From ad53978e39e0854f25d0255e266a4746f0cf123b Mon Sep 17 00:00:00 2001 From: ssdeanx Date: Fri, 6 Mar 2026 10:35:02 -0500 Subject: [PATCH] feat: add new fetch tool and related components - Introduced a new fetch tool in `src/mastra/tools/fetch.tool.ts` for web content fetching and markdown conversion. - Implemented various search functionalities including DuckDuckGo, Google, and Bing search. - Added support for Google News RSS fetching. - Included URL validation and sanitization to ensure safe fetching. - Created a new React component for MCP A2A page in `app/chat/mcp-a2a/page.tsx` to manage MCP servers and tools. - Developed a workspace management page in `app/chat/workspaces/page.tsx` to handle file browsing and skill display. - Updated exports in `src/mastra/tools/index.ts` to include the new fetch tool. --- .github/copilot-instructions.md | 8 + app/chat/components/main-sidebar.tsx | 30 +- app/chat/mcp-a2a/page.tsx | 117 +++ app/chat/workspaces/page.tsx | 214 +++++ lib/AGENTS.md | 7 + lib/hooks/use-mastra-query.ts | 558 +++++++++++ memory-bank/activeContext.md | 14 + memory-bank/progress.md | 16 + src/components/ai-elements/tools/types.ts | 2 + src/mastra/agents/researchAgent.ts | 4 +- src/mastra/tools/fetch.tool.ts | 1063 +++++++++++++++++++++ src/mastra/tools/index.ts | 2 + 12 files changed, 2030 insertions(+), 5 deletions(-) create mode 100644 app/chat/mcp-a2a/page.tsx create mode 100644 app/chat/workspaces/page.tsx create mode 100644 src/mastra/tools/fetch.tool.ts diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 9b4d2b72..cd0ddd27 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -22,6 +22,14 @@ applyTo: '**' - This tool will help you identify issues and suggest fixes. - This is especially useful for debugging and improving code quality. - Try run it before writing new code & after completing so you can ensure everything works correctly. +- ๐Ÿงช When editing a page/component (especially `app/**/page.tsx`), use VS Code interaction error checks (`get_errors` / `#problems`) on the edited files before and after changes. +- โš™๏ธ Internal error-tool enable flow (required for page edits): + - 1) Activate VS Code interaction tools. + - 2) Run `get_errors` on the exact files being edited (not project-wide). + - 3) Fix reported issues. + - 4) Run `get_errors` again on those same files to verify clean state. +- ๐ŸŒ When unsure about framework/API behavior while editing UI pages, use internet research tools first (`#web`, `#websearch`, or `fetch_webpage`) and then apply fixes. +- ๐Ÿšซ Do not run project-wide type checks/lint commands by default for page edits. Use targeted `get_errors` checks unless the user explicitly asks for `typecheck`/`lint` runs. - ๐Ÿ“Œ To update your memory bank, use [#update-memory-bank] tool to add new information. - ๐Ÿ›  Mastra mcp tools use [#mastradocs], [#mastraChanges], [#mastraexamples] tool. - These tools provide access to Mastra documentation, recent changes, and code examples. diff --git a/app/chat/components/main-sidebar.tsx b/app/chat/components/main-sidebar.tsx index 869ce1d4..03da4b6d 100644 --- a/app/chat/components/main-sidebar.tsx +++ b/app/chat/components/main-sidebar.tsx @@ -33,6 +33,8 @@ import { WorkflowIcon, CpuIcon, ActivityIcon, + FolderTreeIcon, + NetworkIcon, Loader2Icon, PlusIcon, } from 'lucide-react' @@ -216,6 +218,26 @@ export function MainSidebar() { + + handleNavClick('/chat/workspaces')} + > + + Workspaces + + + + + handleNavClick('/chat/mcp-a2a')} + > + + MCP / A2A + + + toggleSection('agents')} @@ -284,12 +306,12 @@ export function MainSidebar() {

{agent.name}

- {!!(agent.provider || agent.modelId) && ( + {!(!(agent.provider ?? agent.modelId)) && (
- {agent.provider && `${agent.provider} โ€ข `} + {(Boolean(agent.provider)) && `${agent.provider} โ€ข `} {agent.modelId}
@@ -299,7 +321,7 @@ export function MainSidebar() { {/* Description section */}
- {agent.description ? ( + {(agent.description) ? (
Capabilities @@ -309,7 +331,7 @@ export function MainSidebar() {

) : ( -

+

Specialized AI assistant ready to help with your task.

)} diff --git a/app/chat/mcp-a2a/page.tsx b/app/chat/mcp-a2a/page.tsx new file mode 100644 index 00000000..7ac35075 --- /dev/null +++ b/app/chat/mcp-a2a/page.tsx @@ -0,0 +1,117 @@ +'use client' + +import { useState } from 'react' +import { useMastraQuery } from '@/lib/hooks/use-mastra-query' + +export default function McpA2APage() { + const { + useMcpServers, + useMcpServerTools, + useAgents, + useA2ACard, + } = useMastraQuery() + + const serversResult = useMcpServers({ page: 0, perPage: 50 }) + const servers = serversResult.data?.servers ?? [] + + const [selectedServerId, setSelectedServerId] = useState('') + const activeServerId = selectedServerId || servers[0]?.id || '' + + const toolsResult = useMcpServerTools(activeServerId) + const serverTools = toolsResult.data?.tools ?? [] + + const agentsResult = useAgents() + const agents = agentsResult.data ?? [] + + const [selectedAgentId, setSelectedAgentId] = useState('') + const activeAgentId = selectedAgentId || agents[0]?.id || '' + + const a2aCardResult = useA2ACard(activeAgentId) + const a2aCard = a2aCardResult.data + + return ( +
+

MCP / A2A

+ +
+
+
+

MCP Servers & Tools

+ +
+ +
    + {serverTools.length === 0 ? ( +
  • No MCP tools found.
  • + ) : ( + serverTools.map((tool: { id: string; description?: string }) => ( +
  • +
    {tool.id}
    + {typeof tool.description === 'string' && tool.description.trim().length > 0 ? ( +

    {tool.description}

    + ) : null} +
  • + )) + )} +
+
+ +
+
+

A2A Agent Card

+ +
+ + {!a2aCard ? ( +

No A2A card available.

+ ) : ( +
+
+
Agent
+
{a2aCard.name ?? activeAgentId}
+
+
+
Description
+
{a2aCard.description ?? 'N/A'}
+
+
+
Skills
+
    + {(a2aCard.skills ?? []).map((skill: { id?: string; name?: string }, idx: number) => ( +
  • + {skill.name ?? skill.id ?? 'Unnamed skill'} +
  • + ))} +
+
+
+ )} +
+
+
+ ) +} diff --git a/app/chat/workspaces/page.tsx b/app/chat/workspaces/page.tsx new file mode 100644 index 00000000..de42cbdb --- /dev/null +++ b/app/chat/workspaces/page.tsx @@ -0,0 +1,214 @@ +'use client' + +import { useMemo, useState } from 'react' +import { useMastraQuery } from '@/lib/hooks/use-mastra-query' +import { + FileTree, + FileTreeFile, + FileTreeFolder, +} from '@/src/components/ai-elements/file-tree' +import { + CodeBlock, + CodeBlockActions, + CodeBlockCopyButton, + CodeBlockHeader, + CodeBlockTitle, +} from '@/src/components/ai-elements/code-block' +import type { BundledLanguage } from 'shiki' + +interface WorkspaceFileNode { + path: string + name: string + isDirectory: boolean +} + +const toLanguage = (path: string): BundledLanguage => { + const ext = path.split('.').pop()?.toLowerCase() ?? '' + if (ext === 'ts' || ext === 'tsx') { + return 'typescript' + } + if (ext === 'js' || ext === 'jsx') { + return 'javascript' + } + if (ext === 'json') { + return 'json' + } + if (ext === 'md') { + return 'markdown' + } + if (ext === 'yml' || ext === 'yaml') { + return 'yaml' + } + if (ext === 'py') { + return 'python' + } + if (ext === 'css') { + return 'css' + } + if (ext === 'html') { + return 'html' + } + return 'markdown' +} + +const splitNodes = (files: WorkspaceFileNode[]) => { + const folders = files.filter((f) => f.isDirectory) + const plainFiles = files.filter((f) => !f.isDirectory) + return { folders, plainFiles } +} + +export default function WorkspacesPage() { + const { + useWorkspaces, + useSandboxFiles, + useSandboxReadFile, + useWorkspaceSkills, + } = useMastraQuery() + + const workspacesResult = useWorkspaces() + const workspaces = workspacesResult.data?.workspaces ?? [] + + const [selectedWorkspaceId, setSelectedWorkspaceId] = useState('') + const activeWorkspaceId = selectedWorkspaceId || workspaces[0]?.id || '' + + const filesResult = useSandboxFiles(activeWorkspaceId, '/', true) + const fileNodes = useMemo(() => { + const payload = filesResult.data as unknown + if (typeof payload !== 'object' || payload === null) { + return [] as WorkspaceFileNode[] + } + + const recordPayload = payload as Record + const candidate = + (recordPayload.entries as unknown[]) ?? + (recordPayload.items as unknown[]) ?? + (recordPayload.files as unknown[]) ?? + [] + + if (!Array.isArray(candidate)) { + return [] as WorkspaceFileNode[] + } + + return candidate + .filter((item) => typeof item === 'object' && item !== null) + .map((item) => { + const rec = item as Record + const path = + typeof rec.path === 'string' && rec.path.length > 0 + ? rec.path + : typeof rec.name === 'string' + ? rec.name + : '' + const name = + typeof rec.name === 'string' && rec.name.length > 0 + ? rec.name + : path.split('/').filter(Boolean).pop() ?? path + const isDirectory = + rec.isDirectory === true || rec.type === 'directory' || rec.type === 'dir' + + return { + path, + name, + isDirectory, + } + }) + .filter((node) => node.path.length > 0) + }, [filesResult.data]) + + const [selectedFilePath, setSelectedFilePath] = useState('') + const readFileResult = useSandboxReadFile( + activeWorkspaceId, + selectedFilePath, + 'utf-8' + ) + + const skillsResult = useWorkspaceSkills(activeWorkspaceId) + const skills = skillsResult.data?.skills ?? [] + + const selectedContent = + (readFileResult.data as { content?: string } | undefined)?.content ?? '' + + const { folders, plainFiles } = useMemo(() => splitNodes(fileNodes), [fileNodes]) + + return ( +
+
+

Workspaces

+
+ + +
+
+ +
+
+

Workspace Files (Sandbox)

+ { + if (typeof value === 'string') { + setSelectedFilePath(value) + } + }} + > + {folders.map((folder) => ( + + ))} + {plainFiles.map((file) => ( + + ))} + +
+ +
+ + + {selectedFilePath || 'No file selected'} + + + + + +
+ +
+

Workspace Skills

+
    + {skills.length === 0 ? ( +
  • No skills found.
  • + ) : ( + skills.map((skill, idx) => ( +
  • +
    {skill.name ?? `Skill ${idx + 1}`}
    + {typeof skill.description === 'string' && skill.description.trim().length > 0 ? ( +

    {skill.description}

    + ) : null} +
  • + )) + )} +
+
+
+
+ ) +} diff --git a/lib/AGENTS.md b/lib/AGENTS.md index 0c690ddf..0127129b 100644 --- a/lib/AGENTS.md +++ b/lib/AGENTS.md @@ -20,3 +20,10 @@ The `lib/` directory serves as the frontend client library layer for AgentStack, - **`api.ts`**: Contains typed API client functions that wrap the underlying fetch calls to the Mastra backend. - **`utils.ts`**: Shared UI and logic utilities, including `cn` for Tailwind class merging and date formatting helpers. - **`a2a.ts` & `auth.ts`**: Utilities for Agent-to-Agent coordination and authentication management. + +## Recent Update (2026-03-05) + +- `hooks/use-mastra-query.ts` was expanded with workspace/sandbox UI hooks aligned to `@mastra/client-js` Workspace APIs: + - Queries: `useWorkspaceInfo`, `useWorkspaceFiles`, `useWorkspaceReadFile`, `useWorkspaceStat`, `useWorkspaceSearch`, `useWorkspaceSkills`, `useWorkspaceSearchSkills` + - Mutations: `useWorkspaceWriteFileMutation`, `useWorkspaceDeleteMutation`, `useWorkspaceMkdirMutation`, `useWorkspaceIndexMutation` +- Added granular workspace query keys to support frontend cache invalidation after file/index mutations. diff --git a/lib/hooks/use-mastra-query.ts b/lib/hooks/use-mastra-query.ts index e176d27a..beeeec2d 100644 --- a/lib/hooks/use-mastra-query.ts +++ b/lib/hooks/use-mastra-query.ts @@ -20,16 +20,41 @@ import type { CreateMemoryThreadParams, GetLogsParams, GetMemoryConfigParams, + ListSkillsResponse, ListMemoryThreadMessagesParams, + McpServerListResponse, + McpServerToolListResponse, ListScoresByEntityIdParams, ListScoresByRunIdParams, ListScoresByScorerIdParams, ListStoredAgentsParams, QueryVectorParams, + SearchSkillsParams, + SearchSkillsResponse, StreamParams, UpdateMemoryThreadParams, UpdateModelParams, + WorkspaceFsDeleteResponse, + WorkspaceFsListResponse, + WorkspaceFsMkdirResponse, + WorkspaceFsReadResponse, + WorkspaceFsStatResponse, + WorkspaceFsWriteResponse, + WorkspaceIndexParams, + WorkspaceIndexResponse, + WorkspaceInfoResponse, + WorkspaceSearchParams, + WorkspaceSearchResponse, } from '@mastra/client-js' +import type { + AgentCard, + GetTaskResponse, + MessageSendParams, + SendMessageResponse, + Task, + TaskQueryParams, +} from '@mastra/core/a2a' +import type { ServerDetailInfo } from '@mastra/core/mcp' import type { TracingOptions } from '@mastra/core/observability' import { RequestContext } from '@mastra/core/request-context' import type { ListTracesArgs } from '@mastra/core/storage' @@ -79,6 +104,155 @@ interface MastraQueryHooks { { instructions: string; comment: string }, unknown > + useWorkspaces: () => UseQueryResult<{ workspaces: WorkspaceItem[] }, Error> + useWorkspace: (id: string) => UseQueryResult + useWorkspaceInfo: (id: string) => UseQueryResult + useWorkspaceFiles: ( + workspaceId: string, + path?: string, + recursive?: boolean + ) => UseQueryResult + useWorkspaceReadFile: ( + workspaceId: string, + path: string, + encoding?: string + ) => UseQueryResult + useWorkspaceStat: ( + workspaceId: string, + path: string + ) => UseQueryResult + useWorkspaceSearch: ( + workspaceId: string, + params: WorkspaceSearchParams + ) => UseQueryResult + useWorkspaceSkills: ( + workspaceId: string + ) => UseQueryResult + useWorkspaceSearchSkills: ( + workspaceId: string, + params: SearchSkillsParams + ) => UseQueryResult + useWorkspaceWriteFileMutation: ( + workspaceId: string + ) => UseMutationResult< + WorkspaceFsWriteResponse, + Error, + { + path: string + content: string + options?: { encoding?: 'utf-8' | 'base64'; recursive?: boolean } + }, + unknown + > + useWorkspaceDeleteMutation: ( + workspaceId: string + ) => UseMutationResult< + WorkspaceFsDeleteResponse, + Error, + { path: string; options?: { recursive?: boolean; force?: boolean } }, + unknown + > + useWorkspaceMkdirMutation: ( + workspaceId: string + ) => UseMutationResult< + WorkspaceFsMkdirResponse, + Error, + { path: string; recursive?: boolean }, + unknown + > + useWorkspaceIndexMutation: ( + workspaceId: string + ) => UseMutationResult + useSandboxInfo: ( + workspaceId: string + ) => UseQueryResult + useSandboxFiles: ( + workspaceId: string, + path?: string, + recursive?: boolean + ) => UseQueryResult + useSandboxReadFile: ( + workspaceId: string, + path: string, + encoding?: string + ) => UseQueryResult + useSandboxStat: ( + workspaceId: string, + path: string + ) => UseQueryResult + useSandboxSearch: ( + workspaceId: string, + params: WorkspaceSearchParams + ) => UseQueryResult + useSandboxWriteFileMutation: ( + workspaceId: string + ) => UseMutationResult< + WorkspaceFsWriteResponse, + Error, + { + path: string + content: string + options?: { encoding?: 'utf-8' | 'base64'; recursive?: boolean } + }, + unknown + > + useSandboxDeleteMutation: ( + workspaceId: string + ) => UseMutationResult< + WorkspaceFsDeleteResponse, + Error, + { path: string; options?: { recursive?: boolean; force?: boolean } }, + unknown + > + useSandboxMkdirMutation: ( + workspaceId: string + ) => UseMutationResult< + WorkspaceFsMkdirResponse, + Error, + { path: string; recursive?: boolean }, + unknown + > + useSandboxIndexMutation: ( + workspaceId: string + ) => UseMutationResult + useMcpServers: (params?: { + page?: number + perPage?: number + offset?: number + limit?: number + }) => UseQueryResult + useMcpServerDetails: ( + serverId: string, + params?: { version?: string } + ) => UseQueryResult + useMcpServerTools: ( + serverId: string + ) => UseQueryResult + useMcpToolDetails: ( + serverId: string, + toolId: string, + requestContext?: RequestContext | RequestContextValue + ) => UseQueryResult + useMcpToolExecuteMutation: ( + serverId: string, + toolId: string + ) => UseMutationResult< + unknown, + Error, + { data?: unknown; requestContext?: RequestContext }, + unknown + > + useA2ACard: (agentId: string) => UseQueryResult + useA2ASendMessageMutation: ( + agentId: string + ) => UseMutationResult + useA2AGetTask: ( + agentId: string, + params: TaskQueryParams + ) => UseQueryResult + useA2ACancelTaskMutation: ( + agentId: string + ) => UseMutationResult } /** @@ -215,6 +389,68 @@ export const mastraQueryKeys = { list: () => [...mastraQueryKeys.workspaces.all, 'list'] as const, details: (id: string) => [...mastraQueryKeys.workspaces.all, 'details', id] as const, + info: (id: string) => + [...mastraQueryKeys.workspaces.all, 'info', id] as const, + files: (workspaceId: string, path: string, recursive: boolean) => + [ + ...mastraQueryKeys.workspaces.all, + 'files', + workspaceId, + path, + recursive, + ] as const, + file: (workspaceId: string, path: string, encoding?: string) => + [...mastraQueryKeys.workspaces.all, 'file', workspaceId, path, encoding] as const, + stat: (workspaceId: string, path: string) => + [...mastraQueryKeys.workspaces.all, 'stat', workspaceId, path] as const, + search: (workspaceId: string, params: WorkspaceSearchParams) => + [...mastraQueryKeys.workspaces.all, 'search', workspaceId, params] as const, + skills: (workspaceId: string) => + [...mastraQueryKeys.workspaces.all, 'skills', workspaceId] as const, + searchSkills: (workspaceId: string, params: SearchSkillsParams) => + [ + ...mastraQueryKeys.workspaces.all, + 'searchSkills', + workspaceId, + params, + ] as const, + }, + sandbox: { + all: ['mastra', 'sandbox'] as const, + info: (workspaceId: string) => + [...mastraQueryKeys.sandbox.all, 'info', workspaceId] as const, + files: (workspaceId: string, path: string, recursive: boolean) => + [...mastraQueryKeys.sandbox.all, 'files', workspaceId, path, recursive] as const, + file: (workspaceId: string, path: string, encoding?: string) => + [...mastraQueryKeys.sandbox.all, 'file', workspaceId, path, encoding] as const, + stat: (workspaceId: string, path: string) => + [...mastraQueryKeys.sandbox.all, 'stat', workspaceId, path] as const, + search: (workspaceId: string, params: WorkspaceSearchParams) => + [...mastraQueryKeys.sandbox.all, 'search', workspaceId, params] as const, + }, + mcp: { + all: ['mastra', 'mcp'] as const, + servers: (params?: unknown) => + [...mastraQueryKeys.mcp.all, 'servers', params] as const, + serverDetails: (serverId: string, params?: unknown) => + [...mastraQueryKeys.mcp.all, 'serverDetails', serverId, params] as const, + serverTools: (serverId: string) => + [...mastraQueryKeys.mcp.all, 'serverTools', serverId] as const, + toolDetails: (serverId: string, toolId: string, params?: unknown) => + [ + ...mastraQueryKeys.mcp.all, + 'toolDetails', + serverId, + toolId, + params, + ] as const, + }, + a2a: { + all: ['mastra', 'a2a'] as const, + card: (agentId: string) => + [...mastraQueryKeys.a2a.all, 'card', agentId] as const, + task: (agentId: string, params: unknown) => + [...mastraQueryKeys.a2a.all, 'task', agentId, params] as const, }, } @@ -656,6 +892,299 @@ export function useMastraQuery(): MastraQueryHooks { enabled: !!id, }) + const useWorkspaceInfo = (id: string) => + useQuery({ + queryKey: mastraQueryKeys.workspaces.info(id), + queryFn: () => mastraClient.getWorkspace(id).info(), + enabled: !!id, + }) + + const useWorkspaceFiles = ( + workspaceId: string, + path = '/', + recursive = false + ) => + useQuery({ + queryKey: mastraQueryKeys.workspaces.files(workspaceId, path, recursive), + queryFn: () => mastraClient.getWorkspace(workspaceId).listFiles(path, recursive), + enabled: !!workspaceId, + }) + + const useWorkspaceReadFile = ( + workspaceId: string, + path: string, + encoding = 'utf-8' + ) => + useQuery({ + queryKey: mastraQueryKeys.workspaces.file(workspaceId, path, encoding), + queryFn: () => mastraClient.getWorkspace(workspaceId).readFile(path, encoding), + enabled: !!workspaceId && !!path, + }) + + const useWorkspaceStat = (workspaceId: string, path: string) => + useQuery({ + queryKey: mastraQueryKeys.workspaces.stat(workspaceId, path), + queryFn: () => mastraClient.getWorkspace(workspaceId).stat(path), + enabled: !!workspaceId && !!path, + }) + + const useWorkspaceSearch = (workspaceId: string, params: WorkspaceSearchParams) => + useQuery({ + queryKey: mastraQueryKeys.workspaces.search(workspaceId, params), + queryFn: () => mastraClient.getWorkspace(workspaceId).search(params), + enabled: !!workspaceId && !!params?.query, + }) + + const useWorkspaceSkills = (workspaceId: string) => + useQuery({ + queryKey: mastraQueryKeys.workspaces.skills(workspaceId), + queryFn: () => mastraClient.getWorkspace(workspaceId).listSkills(), + enabled: !!workspaceId, + }) + + const useWorkspaceSearchSkills = ( + workspaceId: string, + params: SearchSkillsParams + ) => + useQuery({ + queryKey: mastraQueryKeys.workspaces.searchSkills(workspaceId, params), + queryFn: () => mastraClient.getWorkspace(workspaceId).searchSkills(params), + enabled: !!workspaceId, + }) + + // Workspace Mutations + const useWorkspaceWriteFileMutation = (workspaceId: string) => + useMutation({ + mutationFn: (params: { + path: string + content: string + options?: { encoding?: 'utf-8' | 'base64'; recursive?: boolean } + }) => + mastraClient + .getWorkspace(workspaceId) + .writeFile(params.path, params.content, params.options), + onSuccess: async () => { + await queryClient.invalidateQueries({ + queryKey: mastraQueryKeys.workspaces.all, + }) + }, + }) + + const useWorkspaceDeleteMutation = (workspaceId: string) => + useMutation({ + mutationFn: (params: { + path: string + options?: { recursive?: boolean; force?: boolean } + }) => + mastraClient + .getWorkspace(workspaceId) + .delete(params.path, params.options), + onSuccess: async () => { + await queryClient.invalidateQueries({ + queryKey: mastraQueryKeys.workspaces.all, + }) + }, + }) + + const useWorkspaceMkdirMutation = (workspaceId: string) => + useMutation({ + mutationFn: (params: { path: string; recursive?: boolean }) => + mastraClient + .getWorkspace(workspaceId) + .mkdir(params.path, params.recursive), + onSuccess: async () => { + await queryClient.invalidateQueries({ + queryKey: mastraQueryKeys.workspaces.all, + }) + }, + }) + + const useWorkspaceIndexMutation = (workspaceId: string) => + useMutation({ + mutationFn: (params: WorkspaceIndexParams) => + mastraClient.getWorkspace(workspaceId).index(params), + onSuccess: async () => { + await queryClient.invalidateQueries({ + queryKey: mastraQueryKeys.workspaces.all, + }) + }, + }) + + // --- SANDBOX (separate frontend hooks) --- + + const useSandboxInfo = (workspaceId: string) => + useQuery({ + queryKey: mastraQueryKeys.sandbox.info(workspaceId), + queryFn: () => mastraClient.getWorkspace(workspaceId).info(), + enabled: !!workspaceId, + }) + + const useSandboxFiles = ( + workspaceId: string, + path = '/', + recursive = false + ) => + useQuery({ + queryKey: mastraQueryKeys.sandbox.files(workspaceId, path, recursive), + queryFn: () => mastraClient.getWorkspace(workspaceId).listFiles(path, recursive), + enabled: !!workspaceId, + }) + + const useSandboxReadFile = ( + workspaceId: string, + path: string, + encoding = 'utf-8' + ) => + useQuery({ + queryKey: mastraQueryKeys.sandbox.file(workspaceId, path, encoding), + queryFn: () => mastraClient.getWorkspace(workspaceId).readFile(path, encoding), + enabled: !!workspaceId && !!path, + }) + + const useSandboxStat = (workspaceId: string, path: string) => + useQuery({ + queryKey: mastraQueryKeys.sandbox.stat(workspaceId, path), + queryFn: () => mastraClient.getWorkspace(workspaceId).stat(path), + enabled: !!workspaceId && !!path, + }) + + const useSandboxSearch = (workspaceId: string, params: WorkspaceSearchParams) => + useQuery({ + queryKey: mastraQueryKeys.sandbox.search(workspaceId, params), + queryFn: () => mastraClient.getWorkspace(workspaceId).search(params), + enabled: !!workspaceId && !!params?.query, + }) + + const useSandboxWriteFileMutation = (workspaceId: string) => + useMutation({ + mutationFn: (params: { + path: string + content: string + options?: { encoding?: 'utf-8' | 'base64'; recursive?: boolean } + }) => + mastraClient + .getWorkspace(workspaceId) + .writeFile(params.path, params.content, params.options), + onSuccess: async () => { + await queryClient.invalidateQueries({ queryKey: mastraQueryKeys.sandbox.all }) + }, + }) + + const useSandboxDeleteMutation = (workspaceId: string) => + useMutation({ + mutationFn: (params: { + path: string + options?: { recursive?: boolean; force?: boolean } + }) => + mastraClient + .getWorkspace(workspaceId) + .delete(params.path, params.options), + onSuccess: async () => { + await queryClient.invalidateQueries({ queryKey: mastraQueryKeys.sandbox.all }) + }, + }) + + const useSandboxMkdirMutation = (workspaceId: string) => + useMutation({ + mutationFn: (params: { path: string; recursive?: boolean }) => + mastraClient.getWorkspace(workspaceId).mkdir(params.path, params.recursive), + onSuccess: async () => { + await queryClient.invalidateQueries({ queryKey: mastraQueryKeys.sandbox.all }) + }, + }) + + const useSandboxIndexMutation = (workspaceId: string) => + useMutation({ + mutationFn: (params: WorkspaceIndexParams) => + mastraClient.getWorkspace(workspaceId).index(params), + onSuccess: async () => { + await queryClient.invalidateQueries({ queryKey: mastraQueryKeys.sandbox.all }) + }, + }) + + // --- MCP --- + + const useMcpServers = (params?: { + page?: number + perPage?: number + offset?: number + limit?: number + }) => + useQuery({ + queryKey: mastraQueryKeys.mcp.servers(params), + queryFn: () => mastraClient.getMcpServers(params), + }) + + const useMcpServerDetails = ( + serverId: string, + params?: { version?: string } + ) => + useQuery({ + queryKey: mastraQueryKeys.mcp.serverDetails(serverId, params), + queryFn: () => mastraClient.getMcpServerDetails(serverId, params), + enabled: !!serverId, + }) + + const useMcpServerTools = (serverId: string) => + useQuery({ + queryKey: mastraQueryKeys.mcp.serverTools(serverId), + queryFn: () => mastraClient.getMcpServerTools(serverId), + enabled: !!serverId, + }) + + const useMcpToolDetails = ( + serverId: string, + toolId: string, + requestContext?: RequestContext | RequestContextValue + ) => + useQuery({ + queryKey: mastraQueryKeys.mcp.toolDetails(serverId, toolId, requestContext), + queryFn: () => + mastraClient + .getMcpServerTool(serverId, toolId) + .details(requestContext as RequestContext), + enabled: !!serverId && !!toolId, + }) + + const useMcpToolExecuteMutation = (serverId: string, toolId: string) => + useMutation({ + mutationFn: (params: { data?: unknown; requestContext?: RequestContext }) => + mastraClient + .getMcpServerTool(serverId, toolId) + .execute({ + data: params.data, + requestContext: params.requestContext, + }), + }) + + // --- A2A --- + + const useA2ACard = (agentId: string) => + useQuery({ + queryKey: mastraQueryKeys.a2a.card(agentId), + queryFn: () => mastraClient.getA2A(agentId).getCard(), + enabled: !!agentId, + }) + + const useA2ASendMessageMutation = (agentId: string) => + useMutation({ + mutationFn: (params: MessageSendParams) => + mastraClient.getA2A(agentId).sendMessage(params), + }) + + const useA2AGetTask = (agentId: string, params: TaskQueryParams) => + useQuery({ + queryKey: mastraQueryKeys.a2a.task(agentId, params), + queryFn: () => mastraClient.getA2A(agentId).getTask(params), + enabled: !!agentId && !!params?.id, + }) + + const useA2ACancelTaskMutation = (agentId: string) => + useMutation({ + mutationFn: (params: TaskQueryParams) => + mastraClient.getA2A(agentId).cancelTask(params), + }) + // --- SYSTEM --- const useSystemPackages = () => @@ -919,6 +1448,24 @@ export function useMastraQuery(): MastraQueryHooks { useVectorDetails, useWorkspaces, useWorkspace, + useWorkspaceInfo, + useWorkspaceFiles, + useWorkspaceReadFile, + useWorkspaceStat, + useWorkspaceSearch, + useWorkspaceSkills, + useWorkspaceSearchSkills, + useSandboxInfo, + useSandboxFiles, + useSandboxReadFile, + useSandboxStat, + useSandboxSearch, + useMcpServers, + useMcpServerDetails, + useMcpServerTools, + useMcpToolDetails, + useA2ACard, + useA2AGetTask, useSystemPackages, // Mutations @@ -940,6 +1487,17 @@ export function useMastraQuery(): MastraQueryHooks { useUpdateWorkingMemoryMutation, useVectorQueryMutation, useVectorUpsertMutation, + useWorkspaceWriteFileMutation, + useWorkspaceDeleteMutation, + useWorkspaceMkdirMutation, + useWorkspaceIndexMutation, + useSandboxWriteFileMutation, + useSandboxDeleteMutation, + useSandboxMkdirMutation, + useSandboxIndexMutation, + useMcpToolExecuteMutation, + useA2ASendMessageMutation, + useA2ACancelTaskMutation, useScoreMutation, } diff --git a/memory-bank/activeContext.md b/memory-bank/activeContext.md index 6515993a..d4c83bc6 100644 --- a/memory-bank/activeContext.md +++ b/memory-bank/activeContext.md @@ -1,3 +1,17 @@ +## Active Context Update (2026-03-05 - Workspace/Sandbox Hook Expansion) + +- Mastra workspace docs were reviewed for frontend integration (`local-filesystem`, `local-sandbox`, `sandbox`, `workspace-class`, `workspace/search`). +- `lib/hooks/use-mastra-query.ts` now includes workspace-facing hooks for sandbox-style UI workflows: + - Queries: `useWorkspaceInfo`, `useWorkspaceFiles`, `useWorkspaceReadFile`, `useWorkspaceStat`, `useWorkspaceSearch`, `useWorkspaceSkills`, `useWorkspaceSearchSkills` + - Mutations: `useWorkspaceWriteFileMutation`, `useWorkspaceDeleteMutation`, `useWorkspaceMkdirMutation`, `useWorkspaceIndexMutation` +- Added granular query keys under `mastraQueryKeys.workspaces` to support cache-safe UI invalidation after file/index mutations. + +## Active Context Update (2026-03-05 - Hook Error Cleanup) + +- `lib/hooks/use-mastra-query.ts` was corrected after partial MCP/A2A integration left type errors. +- Sandbox was split into its own hook set (`useSandbox*`) instead of helper verification hooks. +- MCP/A2A hooks are now included in the returned hook object and can be consumed in frontend UI components. + ## Active Context Update (2026-02-17 - Landing Components Complete) - All homepage landing sections (`landing-*`) received visual polish and GSAP SVG accent integration. diff --git a/memory-bank/progress.md b/memory-bank/progress.md index 2a036093..8384cc32 100644 --- a/memory-bank/progress.md +++ b/memory-bank/progress.md @@ -47,6 +47,22 @@ # Progress +## 2026-03-05 use-mastra-query error cleanup + +- Fixed `lib/hooks/use-mastra-query.ts` compile issues caused by partially wired hooks. +- Removed the stray helper-style `useWorkspaceSandboxReady` hook. +- Added explicit **separate sandbox hooks** (`useSandboxInfo/files/read/stat/search` + sandbox write/delete/mkdir/index mutations). +- Wired MCP hooks (`useMcpServers`, `useMcpServerDetails`, `useMcpServerTools`, `useMcpToolDetails`, `useMcpToolExecuteMutation`) into returned hook object. +- Wired A2A hooks (`useA2ACard`, `useA2AGetTask`, `useA2ASendMessageMutation`, `useA2ACancelTaskMutation`) into returned hook object. + +## 2026-03-05 Workspace/Sandbox Hooks for Frontend UI + +- Reviewed Mastra Workspace docs/reference pages (`local-filesystem`, `local-sandbox`, `sandbox`, `workspace-class`, `workspace/search`) and mapped to `@mastra/client-js` Workspace API. +- Extended `lib/hooks/use-mastra-query.ts` with frontend-ready hooks for workspace filesystem + search + skills: + - queries: `useWorkspaceInfo`, `useWorkspaceFiles`, `useWorkspaceReadFile`, `useWorkspaceStat`, `useWorkspaceSearch`, `useWorkspaceSkills`, `useWorkspaceSearchSkills` + - mutations: `useWorkspaceWriteFileMutation`, `useWorkspaceDeleteMutation`, `useWorkspaceMkdirMutation`, `useWorkspaceIndexMutation` +- Added new `mastraQueryKeys.workspaces.*` keys for granular cache invalidation and stable UI refresh. + ## Dashboard + Public Data Hardening **[Synced 2026-02-17]** - Removed unsafe dashboard casts in key routes/components: diff --git a/src/components/ai-elements/tools/types.ts b/src/components/ai-elements/tools/types.ts index b0920528..024a72b3 100644 --- a/src/components/ai-elements/tools/types.ts +++ b/src/components/ai-elements/tools/types.ts @@ -48,6 +48,7 @@ import type { execaTool, extractLearningsTool, extractTablesTool, + fetchTool, fillFormTool, findFreeSlots, findReferencesTool, @@ -194,6 +195,7 @@ export type ExcalidrawToSVGUITool = InferUITool export type ExecaUITool = InferUITool export type ExtractLearningsUITool = InferUITool export type ExtractTablesUITool = InferUITool +export type FetchUITool = InferUITool export type FillFormUITool = InferUITool export type FindFreeSlotsUITool = InferUITool export type FindReferencesUITool = InferUITool diff --git a/src/mastra/agents/researchAgent.ts b/src/mastra/agents/researchAgent.ts index ae6461bd..062cb3ce 100644 --- a/src/mastra/agents/researchAgent.ts +++ b/src/mastra/agents/researchAgent.ts @@ -9,6 +9,7 @@ import { pgMemory } from '../config/pg-storage' import { mdocumentChunker } from '../tools/document-chunking.tool' import { evaluateResultTool } from '../tools/evaluateResultTool' import { extractLearningsTool } from '../tools/extractLearningsTool' +import { fetchTool } from '../tools/fetch.tool' import { finnhubQuotesTool } from '../tools/finnhub-tools' import { pdfToMarkdownTool } from '../tools/pdf-data-conversion.tool' import { polygonStockQuotesTool } from '../tools/polygon-tools' @@ -67,7 +68,7 @@ Tier: ${userTier} | Lang: ${language} | Phase: ${researchPhase} 5. **Synthesize**: Provide final answer with citations and confidence levels. STOP after Phase 2. ## Tool Selection Guide -- **Web**: 'webScraperTool' (single URL), 'batchWebScraperTool' (multiple). +- **Web**: Prefer 'fetchTool' for reliable URL fetch/search to markdown. Use 'webScraperTool' for selector-based extraction. - **News/Trends**: 'googleNewsTool', 'googleTrendsTool', 'googleFinanceTool'. - **Academic**: 'googleScholarTool'. - **Financial**: Use 'polygon*', 'finnhub*', or 'alphaVantage*' for stocks/crypto. @@ -97,6 +98,7 @@ Tier: ${userTier} | Lang: ${language} | Phase: ${researchPhase} }, tools: { // Core Research Tools + fetchTool, webScraperTool, googleScholarTool, googleNewsLiteTool, diff --git a/src/mastra/tools/fetch.tool.ts b/src/mastra/tools/fetch.tool.ts new file mode 100644 index 00000000..d3acebe1 --- /dev/null +++ b/src/mastra/tools/fetch.tool.ts @@ -0,0 +1,1063 @@ +import { SpanType, getOrCreateSpan } from '@mastra/core/observability' +import type { TracingContext } from '@mastra/core/observability' +import type { RequestContext } from '@mastra/core/request-context' +import type { InferUITool } from '@mastra/core/tools' +import { createTool } from '@mastra/core/tools' +import * as cheerio from 'cheerio' +import { XMLParser } from 'fast-xml-parser' +import { JSDOM } from 'jsdom' +import * as RE2Module from 're2' +import { z } from 'zod' +import { log } from '../config/logger' +import { httpFetch } from '../lib/http-client' + +const RE2Ctor = RE2Module as unknown as new ( + pattern: string, + flags?: string +) => { + test: (input: string) => boolean +} + +const DEFAULT_USER_AGENT = + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 AgentStackFetch/1.0' +const TRACKING_PARAM_RE = new RE2Ctor( + '^(utm_[a-z0-9_]+|gclid|fbclid|yclid|mc_eid|mc_cid|igshid|ref|ref_src)$', + 'i' +) + +export interface FetchToolContext extends RequestContext { + userAgent?: string + timeout?: number + userId?: string + workspaceId?: string +} + +class FetchToolError extends Error { + public readonly code: string + public readonly statusCode?: number + public readonly url?: string + + constructor(message: string, code: string, statusCode?: number, url?: string) { + super(message) + this.code = code + this.statusCode = statusCode + this.url = url + this.name = 'FetchToolError' + } +} + +class ValidationUtils { + static validateUrl(url: string): boolean { + try { + const parsed = new URL(url) + return parsed.protocol === 'http:' || parsed.protocol === 'https:' + } catch { + return false + } + } +} + +function buildRequestHeaders(userAgent?: string): Record { + return { + 'user-agent': + typeof userAgent === 'string' && userAgent.trim() !== '' + ? userAgent.trim() + : DEFAULT_USER_AGENT, + accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'accept-language': 'en-US,en;q=0.9', + } +} + +function sanitizeHtml(html: string): string { + const dom = new JSDOM(String(html), { + contentType: 'text/html', + includeNodeLocations: false, + }) + + const { document } = dom.window + + const dangerousTags = [ + 'script', + 'style', + 'iframe', + 'object', + 'embed', + 'noscript', + 'form', + 'input', + 'button', + 'select', + 'textarea', + ] + + dangerousTags.forEach((tag) => { + document.querySelectorAll(tag).forEach((el) => el.remove()) + }) + + document.querySelectorAll('*').forEach((el) => { + Array.from(el.attributes).forEach((attr) => { + const name = attr.name.toLowerCase() + if (name.startsWith('on')) { + el.removeAttribute(attr.name) + } + if (name === 'href' && /^\s*javascript:/i.test(attr.value)) { + el.setAttribute('href', '#') + } + }) + }) + + return document.body.innerHTML +} + +function htmlToMarkdown(html: string): string { + const $ = cheerio.load(html) + + $('script,style,iframe,object,embed,noscript').remove() + + const title = $('title').first().text().trim() + const bodyText = $('body').text().replace(/\s+/g, ' ').trim() + + const headings = $('h1,h2,h3') + .toArray() + .map((el) => $(el).text().trim()) + .filter((t) => t.length > 0) + + const links = $('a[href]') + .toArray() + .map((el) => { + const href = $(el).attr('href') ?? '' + const text = $(el).text().trim() || href + return href.trim().length > 0 ? `- [${text}](${href})` : '' + }) + .filter((v) => v.length > 0) + + const lines: string[] = [] + + if (title.length > 0) { + lines.push(`# ${title}`, '') + } + + if (headings.length > 0) { + lines.push('## Headings') + headings.forEach((h) => lines.push(`- ${h}`)) + lines.push('') + } + + if (bodyText.length > 0) { + lines.push('## Content', bodyText, '') + } + + if (links.length > 0) { + lines.push('## Links', ...links, '') + } + + return lines.join('\n').trim() +} + +interface SearchResult { + title: string + url: string + snippet?: string +} + +type SearchProvider = 'duckduckgo' | 'google' | 'bing' | 'all' +type SearchVertical = 'web' | 'news' | 'auto' +type ContentWindowMode = 'head' | 'tail' | 'head-tail' + +interface ContentWindowConfig { + minChars: number + maxChars: number + mode: ContentWindowMode +} + +const DEFAULT_CONTENT_WINDOW: ContentWindowConfig = { + minChars: 15000, + maxChars: 75000, + mode: 'head-tail', +} + +function compileRe2Patterns(patterns?: string[]) { + const compiled: Array> = [] + for (const pattern of patterns ?? []) { + try { + if (typeof pattern === 'string' && pattern.trim().length > 0) { + compiled.push(new RE2Ctor(pattern)) + } + } catch (error) { + log.warn('Invalid RE2 pattern ignored', { + pattern, + error: error instanceof Error ? error.message : String(error), + }) + } + } + return compiled +} + +function passesRe2Filters( + value: string, + include: Array>, + exclude: Array> +): boolean { + const includePass = include.length === 0 || include.some((re) => re.test(value)) + if (!includePass) { + return false + } + return !exclude.some((re) => re.test(value)) +} + +function dedupeResults(results: SearchResult[]): SearchResult[] { + const seen = new Set() + const output: SearchResult[] = [] + + for (const result of results) { + const normalized = normalizeUrl(result.url) + if (seen.has(normalized)) { + continue + } + seen.add(normalized) + output.push({ ...result, url: normalized }) + } + + return output +} + +function normalizeUrl(rawUrl: string): string { + try { + const parsed = new URL(rawUrl.trim()) + const keptParams = new URLSearchParams() + parsed.searchParams.forEach((value, key) => { + if (!TRACKING_PARAM_RE.test(key)) { + keptParams.append(key, value) + } + }) + parsed.search = keptParams.toString() + parsed.hash = '' + return parsed.toString().trim() + } catch { + return rawUrl.trim() + } +} + +function isNewsQuery(query: string): boolean { + const lowered = query.toLowerCase() + return /\b(news|headline|headlines|breaking|latest|today|update|updates)\b/.test( + lowered + ) +} + +function applyContentWindow( + markdown: string, + window: ContentWindowConfig +): { markdown: string; originalChars: number; outputChars: number; truncated: boolean } { + const source = String(markdown) + const originalChars = source.length + + if (originalChars <= window.maxChars) { + return { + markdown: source, + originalChars, + outputChars: originalChars, + truncated: false, + } + } + + if (window.mode === 'head') { + const sliced = source.slice(0, window.maxChars).trim() + return { + markdown: `${sliced}\n\n---\n_Truncated by content window (head mode)_`, + originalChars, + outputChars: sliced.length, + truncated: true, + } + } + + if (window.mode === 'tail') { + const sliced = source.slice(-window.maxChars).trim() + return { + markdown: `_Truncated by content window (tail mode)_\n---\n\n${sliced}`, + originalChars, + outputChars: sliced.length, + truncated: true, + } + } + + const headSize = Math.floor(window.maxChars * 0.7) + const tailSize = window.maxChars - headSize + const head = source.slice(0, headSize).trim() + const tail = source.slice(-tailSize).trim() + const joined = `${head}\n\n---\n_Truncated by content window (head-tail mode)_\n---\n\n${tail}` + + return { + markdown: joined, + originalChars, + outputChars: joined.length, + truncated: true, + } +} + +function resolveContentWindow( + input?: Partial +): ContentWindowConfig { + const minChars = input?.minChars ?? DEFAULT_CONTENT_WINDOW.minChars + const maxChars = input?.maxChars ?? DEFAULT_CONTENT_WINDOW.maxChars + const mode = input?.mode ?? DEFAULT_CONTENT_WINDOW.mode + + if (minChars > maxChars) { + return { + minChars: DEFAULT_CONTENT_WINDOW.minChars, + maxChars: DEFAULT_CONTENT_WINDOW.maxChars, + mode, + } + } + + return { minChars, maxChars, mode } +} + +function extractDuckDuckGoResults(html: string): SearchResult[] { + const $ = cheerio.load(html) + const out: SearchResult[] = [] + + $('a.result__a').each((_i, el) => { + const anchor = $(el) + const title = anchor.text().trim() + const href = anchor.attr('href') ?? '' + if (href.trim().length === 0) { + return + } + + let resolvedUrl = href + try { + const urlObj = new URL(href, 'https://duckduckgo.com') + const uddg = urlObj.searchParams.get('uddg') + resolvedUrl = + typeof uddg === 'string' && uddg.trim().length > 0 + ? decodeURIComponent(uddg) + : urlObj.href + } catch { + // Keep original href + } + + const snippet = + anchor.closest('.result').find('.result__snippet').text().trim() || + undefined + + if (ValidationUtils.validateUrl(resolvedUrl)) { + out.push({ title, url: resolvedUrl, snippet }) + } + }) + + return out +} + +function extractGoogleResults(html: string): SearchResult[] { + const $ = cheerio.load(html) + const out: SearchResult[] = [] + + $('a[href^="/url?q="]').each((_i, el) => { + const anchor = $(el) + const href = anchor.attr('href') ?? '' + const title = anchor.text().trim() + if (href.trim().length === 0) { + return + } + + try { + const parsed = new URL(`https://www.google.com${href}`) + const target = parsed.searchParams.get('q') ?? '' + if (!ValidationUtils.validateUrl(target)) { + return + } + + const snippet = + anchor + .closest('div') + .parent() + .find('span,div') + .first() + .text() + .trim() || undefined + + out.push({ + title: title.length > 0 ? title : target, + url: target, + snippet, + }) + } catch { + // Skip malformed results + } + }) + + return out +} + +function extractBingResults(html: string): SearchResult[] { + const $ = cheerio.load(html) + const out: SearchResult[] = [] + + $('li.b_algo').each((_i, el) => { + const node = $(el) + const linkEl = node.find('h2 a').first() + const url = linkEl.attr('href') ?? '' + if (!ValidationUtils.validateUrl(url)) { + return + } + + const title = linkEl.text().trim() + const snippet = node.find('p').first().text().trim() || undefined + + out.push({ + title: title.length > 0 ? title : url, + url, + snippet, + }) + }) + + return out +} + +async function searchDuckDuckGo(options: { + query: string + timeout: number + userAgent?: string +}): Promise { + const response = await httpFetch('https://duckduckgo.com/html/', { + method: 'GET', + timeout: options.timeout, + responseType: 'text', + params: { q: options.query }, + headers: buildRequestHeaders(options.userAgent), + }) + + if (!response.ok) { + throw new FetchToolError( + `DuckDuckGo search failed: HTTP ${response.status}`, + 'SEARCH_DDG_HTTP_ERROR', + response.status, + 'https://duckduckgo.com/html/' + ) + } + + return extractDuckDuckGoResults(await response.text()) +} + +async function searchGoogle(options: { + query: string + maxResults: number + timeout: number + userAgent?: string +}): Promise { + const response = await httpFetch('https://www.google.com/search', { + method: 'GET', + timeout: options.timeout, + responseType: 'text', + params: { + q: options.query, + num: options.maxResults, + hl: 'en', + }, + headers: buildRequestHeaders(options.userAgent), + }) + + if (!response.ok) { + throw new FetchToolError( + `Google search failed: HTTP ${response.status}`, + 'SEARCH_GOOGLE_HTTP_ERROR', + response.status, + 'https://www.google.com/search' + ) + } + + return extractGoogleResults(await response.text()) +} + +async function searchBing(options: { + query: string + timeout: number + userAgent?: string +}): Promise { + const response = await httpFetch('https://www.bing.com/search', { + method: 'GET', + timeout: options.timeout, + responseType: 'text', + params: { q: options.query, setlang: 'en' }, + headers: buildRequestHeaders(options.userAgent), + }) + + if (!response.ok) { + throw new FetchToolError( + `Bing search failed: HTTP ${response.status}`, + 'SEARCH_BING_HTTP_ERROR', + response.status, + 'https://www.bing.com/search' + ) + } + + return extractBingResults(await response.text()) +} + +async function searchGoogleNewsRss(options: { + query: string + timeout: number + userAgent?: string + maxResults: number +}): Promise { + const response = await httpFetch('https://news.google.com/rss/search', { + method: 'GET', + timeout: options.timeout, + responseType: 'text', + params: { + q: options.query, + hl: 'en-US', + gl: 'US', + ceid: 'US:en', + }, + headers: buildRequestHeaders(options.userAgent), + }) + + if (!response.ok) { + throw new FetchToolError( + `Google News RSS search failed: HTTP ${response.status}`, + 'SEARCH_GOOGLE_NEWS_RSS_HTTP_ERROR', + response.status, + 'https://news.google.com/rss/search' + ) + } + + const xml = await response.text() + const parser = new XMLParser({ + ignoreAttributes: false, + parseTagValue: true, + trimValues: true, + }) + + interface RssItem { + title?: string + link?: string + description?: string + } + interface ParsedRss { + rss?: { + channel?: { + item?: RssItem[] | RssItem + } + } + } + + const parsed = parser.parse(xml) as ParsedRss + const itemsRaw = parsed.rss?.channel?.item + const items = Array.isArray(itemsRaw) + ? itemsRaw + : itemsRaw + ? [itemsRaw] + : [] + + return items + .map((item): SearchResult | null => { + const link = typeof item.link === 'string' ? item.link.trim() : '' + if (!ValidationUtils.validateUrl(link)) { + return null + } + + const description = + typeof item.description === 'string' + ? cheerio.load(`
${item.description}
`)('div') + .text() + .replace(/\s+/g, ' ') + .trim() + : undefined + + const title = + typeof item.title === 'string' && item.title.trim() !== '' + ? item.title.trim() + : link + + return { + title, + url: normalizeUrl(link), + snippet: + typeof description === 'string' && + description.trim().length > 0 + ? description + : undefined, + } + }) + .filter((item): item is SearchResult => item !== null) + .slice(0, options.maxResults) +} + +async function fetchPageAsMarkdown(options: { + url: string + timeout: number + userAgent?: string + contentWindow: ContentWindowConfig +}): Promise<{ + markdown: string + originalChars: number + outputChars: number + truncated: boolean +}> { + const headers: Record = {} + Object.assign(headers, buildRequestHeaders(options.userAgent)) + + const response = await httpFetch(options.url, { + method: 'GET', + timeout: options.timeout, + responseType: 'text', + headers, + }) + + if (!response.ok) { + throw new FetchToolError( + `HTTP ${response.status}: ${response.statusText}`, + 'HTTP_ERROR', + response.status, + options.url + ) + } + + const html = await response.text() + const markdown = htmlToMarkdown(sanitizeHtml(html)) + return applyContentWindow(markdown, options.contentWindow) +} + +const fetchToolInputSchema = z + .object({ + url: z.url().optional().describe('Direct URL to fetch and convert to markdown.'), + query: z.string().min(1).max(500).optional().describe('Search query for web discovery.'), + searchProvider: z + .enum(['duckduckgo', 'google', 'bing', 'all']) + .optional() + .describe('Search backend. No fallback is applied.'), + searchVertical: z + .enum(['web', 'news', 'auto']) + .optional() + .describe( + 'Search vertical. auto detects news-like queries and adds Google News RSS for reliability.' + ), + maxResults: z + .number() + .int() + .min(1) + .max(10) + .optional() + .describe('Maximum search results to return (default: 5).'), + includeContent: z + .boolean() + .optional() + .describe('Fetch markdown for each search result (default: true).'), + timeout: z + .number() + .min(1000) + .max(60000) + .optional() + .describe('HTTP timeout in milliseconds (default: 30000).'), + userAgent: z.string().optional().describe('Optional custom user-agent header.'), + contentContext: z + .object({ + minChars: z + .number() + .int() + .min(1000) + .max(200000) + .optional() + .describe('Minimum preferred markdown context size in characters.'), + maxChars: z + .number() + .int() + .min(5000) + .max(250000) + .optional() + .describe('Maximum markdown context size in characters.'), + mode: z + .enum(['head', 'tail', 'head-tail']) + .optional() + .describe('Truncation mode when content exceeds maxChars.'), + }) + .optional() + .describe( + 'Content window controls for markdown output. Defaults to 15k-75k chars in head-tail mode.' + ), + includeUrlPatterns: z.array(z.string()).optional().describe('RE2 include regex patterns for URLs.'), + excludeUrlPatterns: z.array(z.string()).optional().describe('RE2 exclude regex patterns for URLs.'), + }) + .strict() + .refine( + (v) => + (typeof v.url === 'string' && v.url.trim().length > 0) || + (typeof v.query === 'string' && v.query.trim().length > 0), + { + message: 'Either url or query must be provided.', + path: ['url'], + } + ) + +const fetchToolOutputSchema = z + .object({ + mode: z.enum(['url', 'search']), + query: z.string().optional(), + url: z.string().optional(), + markdown: z.string(), + results: z + .array( + z.object({ + title: z.string().optional(), + url: z.string(), + snippet: z.string().optional(), + markdown: z.string().optional(), + }) + ) + .optional(), + metadata: z.object({ + fetchedAt: z.string(), + source: z.string(), + provider: z.string().optional(), + vertical: z.string().optional(), + totalResults: z.number().optional(), + originalChars: z.number().optional(), + outputChars: z.number().optional(), + truncated: z.boolean().optional(), + contentWindow: z + .object({ + minChars: z.number(), + maxChars: z.number(), + mode: z.enum(['head', 'tail', 'head-tail']), + }) + .optional(), + providerDiagnostics: z.record(z.string(), z.string()).optional(), + }), + }) + .strict() + +export const fetchTool = createTool({ + id: 'fetch', + description: + 'Production fetch/search tool with RE2 filtering and markdown output. No fallback, no file writes.', + inputSchema: fetchToolInputSchema, + outputSchema: fetchToolOutputSchema, + onInputStart: ({ toolCallId, messages, abortSignal }) => { + log.info('Fetch tool input streaming started', { + toolCallId, + messageCount: messages.length, + abortSignal: abortSignal?.aborted, + hook: 'onInputStart', + }) + }, + onInputDelta: ({ inputTextDelta, toolCallId, messages, abortSignal }) => { + log.info('Fetch tool received input chunk', { + toolCallId, + inputTextDelta, + messageCount: messages.length, + abortSignal: abortSignal?.aborted, + hook: 'onInputDelta', + }) + }, + onInputAvailable: ({ input, toolCallId, messages, abortSignal }) => { + log.info('Fetch tool input available', { + toolCallId, + messageCount: messages.length, + abortSignal: abortSignal?.aborted, + url: input.url, + query: input.query, + searchProvider: input.searchProvider, + searchVertical: input.searchVertical, + contentContext: input.contentContext, + hook: 'onInputAvailable', + }) + }, + execute: async (inputData, context) => { + const writer = context?.writer + const abortSignal = context?.abortSignal + const requestContext = context?.requestContext as FetchToolContext + const tracingContext: TracingContext | undefined = + context?.tracingContext + + if (abortSignal?.aborted ?? false) { + throw new Error('Fetch tool cancelled') + } + + await writer?.custom({ + type: 'data-tool-progress', + data: { + status: 'in-progress', + message: '๐ŸŒ Starting fetch...', + stage: 'fetch', + }, + id: 'fetch', + }) + + const span = getOrCreateSpan({ + type: SpanType.TOOL_CALL, + name: 'fetch', + input: inputData, + requestContext: context?.requestContext, + tracingContext, + metadata: { + 'tool.id': 'fetch', + 'tool.input.url': inputData.url, + 'tool.input.query': inputData.query, + 'tool.input.searchProvider': inputData.searchProvider, + 'tool.input.searchVertical': inputData.searchVertical, + 'tool.input.contentContext': + typeof inputData.contentContext === 'object' + ? JSON.stringify(inputData.contentContext) + : undefined, + 'user.id': requestContext?.userId, + 'workspace.id': requestContext?.workspaceId, + }, + }) + + const timeout = inputData.timeout ?? requestContext?.timeout ?? 30000 + const userAgent = inputData.userAgent ?? requestContext?.userAgent + const contentWindow = resolveContentWindow(inputData.contentContext) + const includePatterns = compileRe2Patterns(inputData.includeUrlPatterns) + const excludePatterns = compileRe2Patterns(inputData.excludeUrlPatterns) + + try { + if (typeof inputData.url === 'string' && inputData.url.trim() !== '') { + const page = await fetchPageAsMarkdown({ + url: inputData.url, + timeout, + userAgent, + contentWindow, + }) + + const result = fetchToolOutputSchema.parse({ + mode: 'url', + url: inputData.url, + markdown: page.markdown, + metadata: { + fetchedAt: new Date().toISOString(), + source: 'direct-url', + originalChars: page.originalChars, + outputChars: page.outputChars, + truncated: page.truncated, + contentWindow, + }, + }) + + span?.update({ output: result }) + span?.end() + + await writer?.custom({ + type: 'data-tool-progress', + data: { + status: 'done', + message: 'โœ… Fetch complete', + stage: 'fetch', + }, + id: 'fetch', + }) + + return result + } + + const query = inputData.query?.trim() ?? '' + const maxResults = inputData.maxResults ?? 5 + const includeContent = inputData.includeContent ?? true + const provider: SearchProvider = inputData.searchProvider ?? 'all' + const verticalInput: SearchVertical = inputData.searchVertical ?? 'auto' + const effectiveVertical: 'news' | 'web' = + verticalInput === 'news' + ? 'news' + : verticalInput === 'web' + ? 'web' + : isNewsQuery(query) + ? 'news' + : 'web' + + const discoveredRaw: SearchResult[] = [] + const providerDiagnostics: Record = {} + + if (effectiveVertical === 'news') { + try { + const newsResults = await searchGoogleNewsRss({ + query, + timeout, + userAgent, + maxResults, + }) + providerDiagnostics['google-news-rss'] = 'ok' + discoveredRaw.push(...newsResults) + } catch (error) { + providerDiagnostics['google-news-rss'] = + error instanceof Error ? error.message : String(error) + } + } + + if (provider === 'duckduckgo') { + const providerResults = await searchDuckDuckGo({ + query, + timeout, + userAgent, + }) + discoveredRaw.push(...providerResults) + providerDiagnostics.duckduckgo = 'ok' + } else if (provider === 'google') { + const providerResults = await searchGoogle({ + query, + maxResults, + timeout, + userAgent, + }) + discoveredRaw.push(...providerResults) + providerDiagnostics.google = 'ok' + } else if (provider === 'bing') { + const providerResults = await searchBing({ + query, + timeout, + userAgent, + }) + discoveredRaw.push(...providerResults) + providerDiagnostics.bing = 'ok' + } else { + const [ddgResults, googleResults, bingResults] = await Promise.all([ + searchDuckDuckGo({ query, timeout, userAgent }) + .then((results) => { + providerDiagnostics.duckduckgo = 'ok' + return results + }) + .catch((error) => { + providerDiagnostics.duckduckgo = + error instanceof Error ? error.message : String(error) + return [] + }), + searchGoogle({ + query, + maxResults, + timeout, + userAgent, + }) + .then((results) => { + providerDiagnostics.google = 'ok' + return results + }) + .catch((error) => { + providerDiagnostics.google = + error instanceof Error ? error.message : String(error) + return [] + }), + searchBing({ query, timeout, userAgent }) + .then((results) => { + providerDiagnostics.bing = 'ok' + return results + }) + .catch((error) => { + providerDiagnostics.bing = + error instanceof Error ? error.message : String(error) + return [] + }), + ]) + discoveredRaw.push(...ddgResults, ...googleResults, ...bingResults) + } + + const discovered = dedupeResults(discoveredRaw) + .filter((r) => passesRe2Filters(r.url, includePatterns, excludePatterns)) + .slice(0, maxResults) + + const results: Array<{ + title?: string + url: string + snippet?: string + markdown?: string + }> = [] + + for (const item of discovered) { + if (abortSignal?.aborted ?? false) { + throw new Error('Fetch tool cancelled during result fetch') + } + + if (!includeContent) { + results.push(item) + continue + } + + try { + const resultMarkdown = await fetchPageAsMarkdown({ + url: item.url, + timeout, + userAgent, + contentWindow, + }) + results.push({ ...item, markdown: resultMarkdown.markdown }) + } catch (error) { + log.warn('Result content fetch failed; returning search metadata only', { + url: item.url, + error: error instanceof Error ? error.message : String(error), + }) + results.push(item) + } + } + + const markdown = [ + `# Search results for: ${query}`, + '', + ...results.flatMap((r) => { + const sectionTitle = + typeof r.title === 'string' && r.title.trim() !== '' + ? r.title + : r.url + const lines = [`## ${sectionTitle}`, `- URL: ${r.url}`] + if (typeof r.snippet === 'string' && r.snippet.trim() !== '') { + lines.push(`- Snippet: ${r.snippet}`) + } + if (typeof r.markdown === 'string' && r.markdown.trim() !== '') { + lines.push('', r.markdown) + } + lines.push('') + return lines + }), + ] + .join('\n') + .trim() + + const result = fetchToolOutputSchema.parse({ + mode: 'search', + query, + markdown, + results, + metadata: { + fetchedAt: new Date().toISOString(), + source: provider, + provider, + vertical: effectiveVertical, + totalResults: results.length, + contentWindow, + providerDiagnostics, + }, + }) + + span?.update({ output: result }) + span?.end() + + await writer?.custom({ + type: 'data-tool-progress', + data: { + status: 'done', + message: `โœ… Search complete: ${results.length} result(s)`, + stage: 'fetch', + }, + id: 'fetch', + }) + + return result + } catch (error) { + span?.error({ + error: error instanceof Error ? error : new Error(String(error)), + endSpan: true, + }) + throw error + } + }, + onOutput: ({ output, toolCallId, toolName, abortSignal }) => { + log.info('Fetch tool completed', { + toolCallId, + toolName, + abortSignal: abortSignal?.aborted, + mode: output.mode, + resultCount: output.results?.length ?? 0, + hook: 'onOutput', + }) + }, +}) + +export type FetchUITool = InferUITool diff --git a/src/mastra/tools/index.ts b/src/mastra/tools/index.ts index 6c5d37b5..1bdaf780 100644 --- a/src/mastra/tools/index.ts +++ b/src/mastra/tools/index.ts @@ -28,6 +28,7 @@ export * from './editor-agent-tool' export * from './evaluateResultTool' export * from './execa-tool' export * from './extractLearningsTool' +export * from './fetch.tool' export * from './financial-chart-tools' export * from './finnhub-tools' export * from './fs' @@ -70,6 +71,7 @@ export { contentCleanerTool } from './web-scraper-tool' export { apiDataFetcherTool } from './web-scraper-tool' export { scrapingSchedulerTool } from './web-scraper-tool' export { dataExporterTool } from './web-scraper-tool' +export { fetchTool } from './fetch.tool' export { weatherTool } from './weather-tool' export { documentRerankerTool } from './document-chunking.tool' export { mastraChunker } from './document-chunking.tool'