diff --git a/Copilot-Processing.md b/Copilot-Processing.md index 493fa790..68b69483 100644 --- a/Copilot-Processing.md +++ b/Copilot-Processing.md @@ -31,23 +31,22 @@ Instructions also mention Unreal Engine ThirdPerson template project and UE C++ - [x] Update `src/mastra/config/google.ts` to use `createGoogleGenerativeAI`. - [x] Update `src/mastra/config/pg-storage.ts` to use local provider instance instead of legacy facade. - [x] Update documentation +- [x] Improve Chat UI and Google 3 Model Support: + - [x] Add Gemini 3 Flash and update Gemini 3 Pro in `google-models.ts`. + - [x] Improve `ChatInput` with `ModelSelector`, `Context`, `SpeechButton`, and `ActionMenu`. + - [x] Create `ChatSidebar` for agent details, checkpoints, and memory settings. + - [x] Update `ChatPage` layout to include sidebar and adjust height for Navbar. + - [x] Restore all features to `ChatHeader` and add `mt-16` to lower it below the Navbar. + - [x] Add `gemini3Expert` agent to `agents.ts`. ## Summary -Enhanced the GitHub toolset in `src/mastra/tools/github.ts` by adding 5 new tools: +Enhanced the Chat UI and added support for Google Gemini 3 models: -1. `createPullRequest`: Create a new PR with title, head, base, and body. -2. `mergePullRequest`: Merge an existing PR using merge, squash, or rebase methods. -3. `addIssueComment`: Add comments to issues or PRs. -4. `getPullRequest`: Retrieve detailed information about a specific PR. -5. `getIssue`: Retrieve detailed information about a specific issue. - -Fixed a TypeScript error in `src/mastra/agents/bgColorAgent.ts` where `colorChangeTool` was incompatible with the `Agent` class. - -Fixed a runtime error in `app/components/navbar.tsx` caused by `NavigationMenuTrigger` receiving multiple children when `asChild` was true. The `NavigationMenuTrigger` component in `ui/navigation-menu.tsx` was updated to only render the chevron icon when `asChild` is false. - -Updated `src/mastra/config/google.ts` and `src/mastra/config/pg-storage.ts` to be compatible with `@ai-sdk/google` v3. This involved: - -- Using `createGoogleGenerativeAI` to create a provider instance in `google.ts`. -- Updating `pg-storage.ts` to import the `google` provider from the local config instead of the removed facade in `@ai-sdk/google`. -- Removing unnecessary type casts in `pg-storage.ts`. +1. **Google 3 Models**: Added `gemini-3-flash-preview` and updated `gemini-3-pro-preview` in the model configuration. +2. **Chat UI Improvement**: + - **Rich Input**: The `ChatInput` now features a model selector, token usage context, speech-to-text button, and an action menu for attachments. + - **Sidebar Layout**: Added a `ChatSidebar` that displays agent capabilities, conversation checkpoints, and memory configuration (Thread ID/Resource ID). + - **Lowered Header**: The `ChatHeader` now has a top margin (`mt-16`) to sit perfectly below the fixed global Navbar. All original features (checkpoints, memory settings, usage) have been restored to the header while also being available in the sidebar. + - **Layout Adjustment**: Updated the `ChatPage` height to `h-[calc(100vh-4rem)]` to account for the Navbar height and prevent unwanted scrolling. +3. **New Agent**: Added a specialized `Gemini 3 Expert` agent configuration. diff --git a/app/chat/components/agent-chain-of-thought.tsx b/app/chat/components/agent-chain-of-thought.tsx index ee4a58e0..b94858e3 100644 --- a/app/chat/components/agent-chain-of-thought.tsx +++ b/app/chat/components/agent-chain-of-thought.tsx @@ -26,11 +26,11 @@ export function AgentChainOfThought({ defaultOpen = true, className, }: AgentChainOfThoughtProps) { - if (!steps || steps.length === 0) {return null} - const completedCount = useMemo(() => steps.filter((s) => s.status === "complete").length, [steps]) const activeStep = useMemo(() => steps.find((s) => s.status === "active"), [steps]) + if (steps.length === 0) {return null} + return ( @@ -59,7 +59,7 @@ export function AgentChainOfThought({ description={step.description} status={step.status} > - {step.duration && step.status === "complete" && ( + {(Boolean(step.duration)) && step.status === "complete" && ( {step.duration}s diff --git a/app/chat/components/agent-checkpoint.tsx b/app/chat/components/agent-checkpoint.tsx index 3d57cfa0..27b67eff 100644 --- a/app/chat/components/agent-checkpoint.tsx +++ b/app/chat/components/agent-checkpoint.tsx @@ -57,7 +57,7 @@ export function AgentCheckpoint({ onRestore, className, }: AgentCheckpointProps) { - const date = timestamp ? (typeof timestamp === 'string' ? new Date(timestamp) : timestamp) : undefined + const date = timestamp !== undefined ? (typeof timestamp === 'string' ? new Date(timestamp) : timestamp) : undefined const displayLabel = label ?? (date ? `Checkpoint at ${formatTime(date)}` : `Checkpoint ${messageIndex + 1}`) diff --git a/app/chat/components/agent-inline-citation.tsx b/app/chat/components/agent-inline-citation.tsx index 43a2a075..fea81ec9 100644 --- a/app/chat/components/agent-inline-citation.tsx +++ b/app/chat/components/agent-inline-citation.tsx @@ -24,8 +24,7 @@ interface AgentInlineCitationProps { } export function AgentInlineCitation({ citations, text }: AgentInlineCitationProps) { - const citation = citations[0] - if (!citation) {return {text}} + if (citations.length === 0) {return {text}} return ( diff --git a/app/chat/components/agent-queue.tsx b/app/chat/components/agent-queue.tsx index da48e35e..62875df4 100644 --- a/app/chat/components/agent-queue.tsx +++ b/app/chat/components/agent-queue.tsx @@ -117,10 +117,10 @@ function TaskSection({ )} - {task.description && ( + {(Boolean(task.description)) && ( {task.description} )} - {task.error && ( + {(Boolean(task.error)) && ( {task.error} diff --git a/app/chat/components/chat-header.tsx b/app/chat/components/chat-header.tsx index 25476f7e..7bf2cd77 100644 --- a/app/chat/components/chat-header.tsx +++ b/app/chat/components/chat-header.tsx @@ -116,7 +116,7 @@ export function ChatHeader() { const usedTokens = usage ? usage.inputTokens + usage.outputTokens : 0 return ( -
+
@@ -201,15 +201,15 @@ export function ChatHeader() { - + - + No models found. {PROVIDER_ORDER.map((provider) => { const models = modelsByProvider[provider] @@ -266,9 +266,9 @@ export function ChatHeader() { {agentConfig?.name ?? selectedAgent} - + - + No agents found. {CATEGORY_ORDER.map((category) => { const agents = agentsByCategory[category] @@ -282,7 +282,7 @@ export function ChatHeader() { {agents.map((agent) => ( handleSelectAgent(agent)} className="flex items-center justify-between" > diff --git a/app/chat/components/chat-input.tsx b/app/chat/components/chat-input.tsx index d848fcbe..3867bd12 100644 --- a/app/chat/components/chat-input.tsx +++ b/app/chat/components/chat-input.tsx @@ -9,13 +9,50 @@ import { PromptInputSubmit, PromptInputAttachments, PromptInputAttachment, + PromptInputHeader, + PromptInputBody, + PromptInputActionMenu, + PromptInputActionMenuTrigger, + PromptInputActionMenuContent, + PromptInputActionAddAttachments, + PromptInputSpeechButton, } from "@/src/components/ai-elements/prompt-input" +import { + ModelSelector, + ModelSelectorTrigger, + ModelSelectorContent, + ModelSelectorInput, + ModelSelectorList, + ModelSelectorEmpty, + ModelSelectorGroup, + ModelSelectorItem, +} from "@/src/components/ai-elements/model-selector" +import { + Context, + ContextTrigger, + ContextContent, + ContextContentHeader, + ContextContentBody, + ContextInputUsage, + ContextOutputUsage, +} from "@/src/components/ai-elements/context" import { useChatContext } from "@/app/chat/providers/chat-context" import { AgentSuggestions } from "./agent-suggestions" import { getSuggestionsForAgent } from "./chat.utils" import { Badge } from "@/ui/badge" -import { PaperclipIcon, SquareIcon, BotIcon, CpuIcon } from "lucide-react" -import { useMemo } from "react" +import { + PaperclipIcon, + SquareIcon, + BotIcon, + CpuIcon, + MicIcon, + SparklesIcon, + ListTodoIcon, +} from "lucide-react" +import { useMemo, useState, useRef } from "react" +import { MODEL_CONFIGS } from "../config/models" +import { getAgentsByCategory, CATEGORY_ORDER, CATEGORY_LABELS } from "../config/agents" +import { cn } from "@/lib/utils" export function ChatInput() { const { @@ -26,10 +63,16 @@ export function ChatInput() { agentConfig, selectedAgent, selectedModel, + selectModel, + selectAgent, messages, usage, + createCheckpoint, } = useChatContext() + const [input, setInput] = useState("") + const textareaRef = useRef(null) + const supportsFiles = agentConfig?.features.fileUpload ?? false const showSuggestions = messages.length === 0 && !isLoading const totalTokens = usage ? usage.inputTokens + usage.outputTokens : 0 @@ -39,9 +82,17 @@ export function ChatInput() { [selectedAgent] ) + const agentsByCategory = useMemo(() => getAgentsByCategory(), []) + + /* Agent Selector - compact dropdown in input toolbar */ + + // Model Selector + + const handleSubmit = async (message: { text: string; files: unknown[] }) => { if (message.text.trim()) { sendMessage(message.text, message.files as File[]) + setInput("") } } @@ -51,7 +102,7 @@ export function ChatInput() { return (
-
+
{showSuggestions && ( - {supportsFiles && ( - - {(file) => ( - - )} - - )} - + + {supportsFiles && ( + + {(file) => ( + + )} + + )} + + + + setInput(e.target.value)} + ref={textareaRef} + /> + + - {supportsFiles && ( - { - const input = document.querySelector( - 'input[type="file"]' - ) - input?.click() - }} - > + + - - )} + + + + + + + + + + + { + if (messages.length > 0) { + createCheckpoint(messages.length - 1) + } + }} + disabled={messages.length === 0} + title="Create checkpoint" + > + + + + {/* Agent Selector */} + + + + + + {agentConfig?.name ?? selectedAgent} + + + + + + + No agents found. + {CATEGORY_ORDER.map((category) => { + const agents = agentsByCategory[category] + if (agents.length === 0) {return null} + + return ( + + {agents.map((agent) => ( + selectAgent(agent.id)} + className={cn(selectedAgent === agent.id && "bg-accent")} + > + {agent.name} + + ))} + + ) + })} + + + + + {/* Model Selector */} + + + + + + {selectedModel.name.split(" ")[0]} + + + + + + + No models found. + + {MODEL_CONFIGS.map((model) => ( + selectModel(model.id)} + className={cn(selectedModel.id === model.id && "bg-accent")} + > + {model.name} ({model.provider}) + + ))} + + + + + + {/* Context/Token Usage */} + + + + + + + + + + + {isLoading && ( )} - +
diff --git a/app/chat/components/chat-sidebar.tsx b/app/chat/components/chat-sidebar.tsx new file mode 100644 index 00000000..793a843b --- /dev/null +++ b/app/chat/components/chat-sidebar.tsx @@ -0,0 +1,198 @@ +"use client" + +import { useChatContext } from "@/app/chat/providers/chat-context" +import { Badge } from "@/ui/badge" +import { Button } from "@/ui/button" +import { Input } from "@/ui/input" +import { + BotIcon, + CpuIcon, + HistoryIcon, + BookmarkIcon, + SettingsIcon, + DatabaseIcon, + HashIcon, + UserIcon, + CheckCircle2Icon, + CircleIcon, + InfoIcon, +} from "lucide-react" +import { useState, useCallback } from "react" +import { CATEGORY_LABELS } from "../config/agents" +import { cn } from "@/lib/utils" + +export function ChatSidebar() { + const { + agentConfig, + checkpoints, + restoreCheckpoint, + threadId, + resourceId, + setThreadId, + setResourceId, + selectedModel, + } = useChatContext() + + const [tempThreadId, setTempThreadId] = useState(threadId) + const [tempResourceId, setTempResourceId] = useState(resourceId) + + const handleSaveMemory = useCallback(() => { + setThreadId(tempThreadId) + setResourceId(tempResourceId) + }, [tempThreadId, tempResourceId, setThreadId, setResourceId]) + + if (!agentConfig) {return null} + + const features = [ + { id: "reasoning", label: "Reasoning", enabled: agentConfig.features.reasoning }, + { id: "tools", label: "Tools", enabled: agentConfig.features.tools }, + { id: "sources", label: "Sources", enabled: agentConfig.features.sources }, + { id: "canvas", label: "Canvas", enabled: agentConfig.features.canvas }, + { id: "artifacts", label: "Artifacts", enabled: agentConfig.features.artifacts }, + { id: "plan", label: "Planning", enabled: agentConfig.features.plan }, + { id: "task", label: "Tasks", enabled: agentConfig.features.task }, + { id: "webPreview", label: "Web Preview", enabled: agentConfig.features.webPreview }, + ] + + return ( + + ) +} diff --git a/app/chat/config/google-models.ts b/app/chat/config/google-models.ts index c229fff8..fd2ba64f 100644 --- a/app/chat/config/google-models.ts +++ b/app/chat/config/google-models.ts @@ -15,6 +15,24 @@ export const GOOGLE_PROVIDER_CONFIG: ProviderConfig = { export const GOOGLE_MODELS: ModelConfig[] = [ // Gemini Series + { + id: "google/gemini-3-flash-preview", + name: "Gemini 3 Flash Preview", + provider: "google", + contextWindow: 1048576, + capabilities: ["chat", "reasoning", "vision", "code"], + pricing: { input: 0.10, output: 0.40 }, + description: "Next-gen fast preview model", + }, + { + id: "google/gemini-3-pro-preview", + name: "Gemini 3 Pro Preview", + provider: "google", + contextWindow: 1048576, + capabilities: ["chat", "reasoning", "vision", "code"], + pricing: { input: 2.00, output: 12.00 }, + description: "Next-gen high-performance preview model", + }, { id: "google/gemini-2.5-flash-lite-preview-09-2025", name: "Gemini 2.5 Flash Lite Preview (Sep 09 2025)", @@ -33,15 +51,7 @@ export const GOOGLE_MODELS: ModelConfig[] = [ pricing: { input: 0.30, output: 3.00 }, description: "September 2025 preview model", }, - { - id: "google/gemini-3-pro-preview", - name: "Gemini 3 Pro Preview", - provider: "google", - contextWindow: 1048576, - capabilities: ["chat", "reasoning", "vision", "code"], - pricing: { input: 2.00, output: 12.00 }, - description: "Next-gen preview model", - }, + { id: "google/gemini-2.5-pro-latest", name: "Gemini 2.5 Pro Latest", diff --git a/app/chat/config/models.ts b/app/chat/config/models.ts index 3e60b3f2..01a8120e 100644 --- a/app/chat/config/models.ts +++ b/app/chat/config/models.ts @@ -102,7 +102,7 @@ export function getModelConfig(modelId: string): ModelConfig | undefined { } export function getDefaultModel(): ModelConfig { - return MODEL_CONFIGS.find((m) => m.isDefault) ?? MODEL_CONFIGS[0] + return MODEL_CONFIGS.find((m) => m.isDefault === true) ?? MODEL_CONFIGS[0] } export function formatContextWindow(tokens: number): string { diff --git a/app/chat/page.tsx b/app/chat/page.tsx index 27389b31..13e288ba 100644 --- a/app/chat/page.tsx +++ b/app/chat/page.tsx @@ -5,15 +5,21 @@ import { ChatProvider } from "./providers/chat-context" import { ChatHeader } from "./components/chat-header" import { ChatMessages } from "./components/chat-messages" import { ChatInput } from "./components/chat-input" +import { ChatSidebar } from "./components/chat-sidebar" export default function ChatPage() { return ( Loading...
}> -
+
- - +
+
+ + +
+ +
diff --git a/app/networks/components/network-header.tsx b/app/networks/components/network-header.tsx index ef91c6bd..61c28317 100644 --- a/app/networks/components/network-header.tsx +++ b/app/networks/components/network-header.tsx @@ -98,15 +98,15 @@ export function NetworkHeader() { - + - + No models found. {PROVIDER_ORDER.map((provider) => { const models = modelsByProvider[provider] diff --git a/app/workflows/page.tsx b/app/workflows/page.tsx index 905c5c84..861082dd 100644 --- a/app/workflows/page.tsx +++ b/app/workflows/page.tsx @@ -9,7 +9,7 @@ export default function WorkflowsPage() { return ( Loading...
}> -
+
diff --git a/src/mastra/agents/acpAgent.ts b/src/mastra/agents/acpAgent.ts index b2c2244a..1efe1ad4 100644 --- a/src/mastra/agents/acpAgent.ts +++ b/src/mastra/agents/acpAgent.ts @@ -33,54 +33,24 @@ export const acpAgent = new Agent({ return { role: 'system', content: ` - - User: ${userId} - Role: ${roleConstraint} - You are ACP Agent — a focused assistant for managing ACP-related tasks and data operations reliably, safely, and audibly. - +# ACP Agent +User: ${userId} | Role: ${roleConstraint} - - - Manage ACP tasks, including creation, updates, retrieval, and status checks. - - Extract and ingest information from files (CSV, PDF), web sources (articles, arXiv), or code repositories. - - Transform and export data (CSV, Excalidraw), run conversions and gluing workflows, and create reports or artifacts. - - Serve as the operable "data-processing & task management" assistant for ACP workflows (use tools to fetch, transform, and store data). - +## Core Responsibilities +- **Manage Tasks**: Create, update, and track ACP-related tasks. +- **Data Ops**: Ingest (CSV, PDF, Web, Repo), transform (Excalidraw, JSON), and export data. +- **Persistence**: Use 'pgQueryTool' for Mongo-like operations with ${PGVECTOR_PROMPT}. - - - Query & mutate Mongo records via mongoQueryTool / mongoGraphTool (Follow Mongo rules below). - - Web enrichment: webScraperTool, arxivTool, googleSearch (read-only for external sources unless instructed otherwise). - - File tooling: csvToExcalidrawTool, readCSVDataTool, pdfToMarkdownTool, writeDataFileTool. - - Process execution: execaTool (only on user confirmation to run destructive or external commands). - - Code navigation: searchCode, getFileContent, getRepositoryInfo (read-only unless explicit write authorized). - +## Process +1. **Plan**: Outline 1-3 steps with tool rationale. +2. **Execute**: Small steps; validate results. +3. **Report**: Structured results; persist to 'acp_tasks' collection. - - 1. Clarify: Confirm the user intent (if ambiguous, ask targeted clarifying questions). - 2. Plan: Outline a short plan (1-3 steps) describing the tools you will use and why. - 3. Execute: Use tools in small steps; validate each intermediate result. - 4. Persist & Report: - - Persist important decisions, task status, and metadata to Mongo ONLY after completion or explicit commit. - - Return a structured result for consumption by calling workflows or UIs. - - - - - Use ${PGVECTOR_PROMPT} to format queries/updates and avoid any unstructured updates. - - Persist "decisions" and "task changes" to collection: acp_tasks, with schema: {taskId, title, status, createdBy, modifiedBy, timestamp, actionLog}. - - Write to memory only after the task is validated. - - - - - Always do a read with mongoQueryTool before mutating. - - For execaTool operations: do a dry-run and report a proposed command before executing. Ask the user for explicit confirmation before any side-effecting operations (e.g. file writes, process executions, or network calls). - - Web scraping and data pulls must be validated for copyright or robots rules (flag for follow-up). - - - - - Do NOT include secrets or environment variables in outputs or memory writes. - - Mask PII in any outputs by default; if the user requests PII handling, require explicit permission and justification. - - Reject any attempt to exfiltrate data or run arbitrary commands without confirmation & elevated auth. - - `, +## Rules +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +- **Security**: Mask PII; no secrets in logs; confirm destructive 'execaTool' calls. +- **Validation**: Read before mutate; validate web scraping rules. +`, providerOptions: { google: { thinkingConfig: { diff --git a/src/mastra/agents/businessLegalAgents.ts b/src/mastra/agents/businessLegalAgents.ts index 35b004b7..5a6a12ce 100644 --- a/src/mastra/agents/businessLegalAgents.ts +++ b/src/mastra/agents/businessLegalAgents.ts @@ -79,6 +79,9 @@ export const legalResearchAgent = new Agent({ - Provide confidence assessments for findings - Cite sources properly +**Rules:** +- **Tool Efficiency:** Do NOT use the same tool repetitively or back-to-back for the same query. + **Process:** 1. Break down legal issues into specific queries 2. Search authoritative databases @@ -190,6 +193,9 @@ You are a Senior Contract Analyst. Analyze legal documents for risks, obligation - Confidentiality and IP rights - Dispute resolution +**Rules:** +- **Tool Efficiency:** Do NOT use the same tool repetitively or back-to-back for the same query. + **Examples:** - Contract: NDA with broad confidentiality → Flag overbroad scope, suggest specific limitations, assess enforceability @@ -276,6 +282,9 @@ You are a Compliance Officer. Monitor regulatory compliance and identify risks a - Industry-specific compliance - Ethical standards +**Rules:** +- **Tool Efficiency:** Do NOT use the same tool repetitively or back-to-back for the same query. + **Examples:** - Business: E-commerce platform → Check GDPR/CCPA compliance, payment processing regulations, consumer protection laws @@ -363,6 +372,9 @@ You are a Chief Strategy Officer with legal expertise. Align business strategy w - Regulatory change adaptation - Risk management frameworks +**Rules:** +- **Tool Efficiency:** Do NOT use the same tool repetitively or back-to-back for the same query. + **Examples:** - Strategy: Enter new international market → Assess local regulations, tax implications, compliance costs, recommend legal structure diff --git a/src/mastra/agents/codingAgents.ts b/src/mastra/agents/codingAgents.ts index 08d0aeb8..ec455831 100644 --- a/src/mastra/agents/codingAgents.ts +++ b/src/mastra/agents/codingAgents.ts @@ -80,6 +80,9 @@ Provide structured responses with: - Implementation steps - Risk assessment +**Rules:** +- **Tool Efficiency:** Do NOT use the same tool repetitively or back-to-back for the same query. + Always consider maintainability, scalability, and testability in your recommendations.`, providerOptions: { google: { @@ -187,6 +190,9 @@ export const codeReviewerAgent = new Agent({ - Suggestions (nice to have) - Positive observations +**Rules:** +- **Tool Efficiency:** Do NOT use the same tool repetitively or back-to-back for the same query. + Be constructive and educational in feedback.`, providerOptions: { google: { @@ -298,6 +304,9 @@ Provide: - Mock setup instructions - Run commands (npx vitest ) +**Rules:** +- **Tool Efficiency:** Do NOT use the same tool repetitively or back-to-back for the same query. + Always use Vitest syntax: describe, it, expect, vi.mock, vi.fn.`, providerOptions: { google: { @@ -404,7 +413,10 @@ For each refactoring: - Proposed solution - Before/after diff - Risk assessment -- Verification steps`, +- Verification steps + +**Rules:** +- **Tool Efficiency:** Do NOT use the same tool repetitively or back-to-back for the same query.`, providerOptions: { google: { thinkingConfig: { diff --git a/src/mastra/agents/contentStrategistAgent.ts b/src/mastra/agents/contentStrategistAgent.ts index 4b402d9b..d2c2f8f5 100644 --- a/src/mastra/agents/contentStrategistAgent.ts +++ b/src/mastra/agents/contentStrategistAgent.ts @@ -31,54 +31,26 @@ export const contentStrategistAgent = new Agent({ const backupDataTools = requestContext.get('backupDataTools') ?? ['chartSupervisorTool']; return { role: 'system', - content: `You are an Elite Content Strategist (10+ years viral content engineering). -User: ${userId} -Tier: ${userTier} - -Your content strategy style is: ${strategy} - - + content: ` +# Content Strategist +User: ${userId} | Tier: ${userTier} | Style: ${strategy} -Your approach is to develop a comprehensive content strategy that maximizes engagement and reach. You will: -1. Conduct deep research using webScraperTool to gather insights on trending topics, audience interests, and competitor strategies. -2. Analyze data to identify content gaps and opportunities. -3. Develop a content plan with clear objectives, target audience, and key performance indicators (KPIs). -4. Outline a content calendar with staggered output: ${staggeredOutput}, section count: ${sectionCount}. -5. Recommend content formats and distribution channels. -6. Provide contingency plans using backup data tools: ${backupDataTools.join(', ')}. - +## Approach +1. **Research**: Use 'webScraperTool' for trends, audience, and competitors. +2. **Analyze**: Identify gaps and opportunities. +3. **Plan**: Objectives, audience, KPIs, and calendar (Staggered: ${staggeredOutput}, Sections: ${sectionCount}). +4. **Execute**: Use backup tools: ${backupDataTools.join(', ')}. - -Every content piece needs: "Reason to Exist" (RTE) + "Reason to Share" (RTS). - +## Methodology +- **Iceberg**: Keywords → Gaps → Psych triggers (FOMO/Curiosity). +- **Blue Ocean**: Contrarian angles + hyper-specificity. +- **Structure**: Hook → Value stack → Open loops. - -MANDATORY RESEARCH PROCESS: -1. Use webScraperTool with Google search URL: "https://www.google.com/search?q=[topic]+2025" -2. Extract top 3 result URLs from the search page -3. Scrape EACH result URL individually with webScraperTool -4. From each page extract: keywords, unanswered questions, content gaps, comments - -EXAMPLE FLOW: -- webScraperTool({url: "https://google.com/search?q=nextjs+caching+2025"}) → get result URLs -- webScraperTool({url: "https://result1.com/article"}) → extract content -- webScraperTool({url: "https://result2.com/guide"}) → extract content -- webScraperTool({url: "https://result3.com/tutorial"}) → extract content - - - -1. **Iceberg Research**: Surface keywords → Content gaps → Psych triggers (FOMO/Curiosity/Status) -2. **Blue Ocean**: Contrarian angle + hyper-specificity (e.g., "Build X in 15min") -3. **Structure**: Hook promise → Value stack (Basic→Pro) → Open loops - - - -- Titles: FOMO/Urgency/Curiosity triggers, 60 char max -- Avatar: Hyper-specific persona -- KeyPoints: 3-5 with actionable sub-bullets -- ALWAYS cite scraped sources -- JSON output only -`, +## Rules +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +- **Titles**: FOMO/Urgency triggers, 60 char max. +- **Output**: JSON only; always cite sources. +`, providerOptions: { google: { thinkingConfig: { diff --git a/src/mastra/agents/copywriterAgent.ts b/src/mastra/agents/copywriterAgent.ts index 9a56814f..5aca93a3 100644 --- a/src/mastra/agents/copywriterAgent.ts +++ b/src/mastra/agents/copywriterAgent.ts @@ -34,84 +34,21 @@ export const copywriterAgent = new Agent({ return { role: 'system', content: ` -You are an expert copywriter agent specializing in creating engaging, high-quality content across multiple formats and purposes. +# Copywriter Agent User: ${userId ?? 'anonymous'} - -Your goal is to create compelling content based on the specified type and requirements. This includes conducting research, structuring the content appropriately, writing the body, and ensuring it is polished and ready for its intended purpose. - +## Task +Create compelling content (blog, marketing, social, technical, business, creative) by researching, structuring, and polishing for the intended purpose. - -- **blog**: Well-structured, informative blog posts with engaging narratives -- **marketing**: Persuasive copy for campaigns, product descriptions, landing pages -- **social**: Concise, engaging content for social media platforms -- **technical**: Clear, accurate documentation, tutorials, and technical explanations -- **business**: Professional communications, emails, reports, and presentations -- **creative**: Storytelling, articles, and narrative-driven content -- **general**: Versatile content for various purposes and audiences - +## Process +1. **Research**: Understand audience and gather info. +2. **Strategy**: Plan structure, tone, and messaging. +3. **Draft**: Write for flow and engagement. +4. **Refine**: Polish language and ensure consistency. - -For each content type, adapt your approach: - -**Blog Content:** -- Engaging hooks and compelling narratives -- Well-structured with clear headings and sections -- SEO-friendly while maintaining readability -- Call-to-action elements - -**Marketing Copy:** -- Persuasive language focused on benefits -- Clear value propositions -- Compelling calls-to-action -- Target audience awareness - -**Social Media Content:** -- Concise and attention-grabbing -- Platform-appropriate formatting -- Hashtags and engagement elements -- Shareable and relatable - -**Technical Writing:** -- Clear, precise explanations -- Step-by-step instructions where applicable -- Accurate terminology and concepts -- Accessible to target audience level - -**Business Communications:** -- Professional and polished tone -- Clear objectives and outcomes -- Appropriate formality level -- Action-oriented language - -**Creative Content:** -- Compelling narratives and storytelling -- Emotional resonance and engagement -- Creative language and imagery -- Memorable and impactful - -**General Content:** -- Adaptable tone and style -- Clear structure and flow -- Audience-appropriate language -- Purpose-driven communication - - - -1. **Research & Analysis**: Gather relevant information and understand the target audience -2. **Content Strategy**: Plan structure, tone, and key messaging -3. **Draft Creation**: Write the core content with attention to flow and engagement -4. **Refinement**: Polish language, check clarity, and ensure consistency -5. **Final Review**: Verify content meets objectives and quality standards - - - -Provide the final content in a clear, well-structured format appropriate for the content type. Include: -- Main content body -- Relevant headings and formatting -- Call-to-action where appropriate -- Meta information (title, description, tags) for content types that benefit from it - +## Rules +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +- **Output**: Clear, well-structured format with headings, CTA, and meta info. `, providerOptions: { google: { diff --git a/src/mastra/agents/dane.ts b/src/mastra/agents/dane.ts index 823f2e03..56fe9f3c 100644 --- a/src/mastra/agents/dane.ts +++ b/src/mastra/agents/dane.ts @@ -184,6 +184,7 @@ export const dane = new Agent({ Makes you a powerful agent capable of generating images and saving them to disk. Pass the directory and an image prompt. # Rules + * **Tool Efficiency:** Do NOT use the same tool repetitively or back-to-back for the same query. * DO NOT ATTEMPT TO USE GENERAL KNOWLEDGE. Use the 'googleSearch' tool to find the answer. * Don't reference tools when you communicate with the user. Do not mention what tools you are using. * Tell the user what you are doing. diff --git a/src/mastra/agents/dataExportAgent.ts b/src/mastra/agents/dataExportAgent.ts index f147717c..cf3311c4 100644 --- a/src/mastra/agents/dataExportAgent.ts +++ b/src/mastra/agents/dataExportAgent.ts @@ -34,47 +34,19 @@ export const dataExportAgent = new Agent({ const overwriteExisting = requestContext.get('overwriteExisting') ?? false const delimiter = requestContext.get('delimiter') ?? ',' - return `You are a Data Export Specialist. Your role is to convert structured data into clean, valid CSV files. - -## Configuration -- User: ${userId} -- Output Directory: ${outputDirectory} -- Overwrite Existing: ${overwriteExisting} -- CSV Delimiter: ${delimiter} + return ` +# Data Export Specialist +User: ${userId} | Out: ${outputDirectory} | Overwrite: ${overwriteExisting} ## Workflow - -1. **Validate Input Data** - - Use dataValidatorTool to verify the input data structure - - Ensure data is an array of objects with consistent keys - - Report any validation errors clearly - -2. **Prepare for Export** - - If overwriting, use backupDataTool to create a backup first - - Use listDataDirTool to check if file already exists - -3. **Convert to CSV** - - Use jsonToCsvTool to convert the validated JSON data - - Apply the configured delimiter - - Ensure headers are included - -4. **Write Output** - - Use writeDataFileTool to save the CSV to the output directory - - Return the file path and row count - -## Guidelines - -- Always validate data before conversion -- Handle arrays of objects as rows, object keys as headers -- Escape special characters (quotes, delimiters, newlines) properly -- Report any validation or conversion errors clearly -- Provide a summary: file path, row count, column count - -## Error Handling - -- If validation fails, return detailed error messages per field -- If file exists and overwrite is false, ask for confirmation -- If backup fails, abort the export and report the issue +1. **Validate**: Use 'dataValidatorTool' to verify structure. +2. **Prepare**: Use 'backupDataTool' if overwriting; check existence with 'listDataDirTool'. +3. **Convert**: Use 'jsonToCsvTool' with delimiter: ${delimiter}. +4. **Write**: Use 'writeDataFileTool' and return summary (path, rows). + +## Rules +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +- **Guidelines**: Escape special characters; report errors clearly. ` }, model: googleAI, diff --git a/src/mastra/agents/dataIngestionAgent.ts b/src/mastra/agents/dataIngestionAgent.ts index 33a2fac7..fce29700 100644 --- a/src/mastra/agents/dataIngestionAgent.ts +++ b/src/mastra/agents/dataIngestionAgent.ts @@ -35,59 +35,26 @@ export const dataIngestionAgent = new Agent({ const sourceDirectory = requestContext.get('sourceDirectory') ?? './data' const maxRows = requestContext.get('maxRows') ?? 10000 - return `You are a Data Ingestion Specialist. Your role is to safely import and validate CSV data. + return ` +# Data Ingestion Specialist +User: ${userId} | Dir: ${sourceDirectory} | Max Rows: ${maxRows} -## Configuration -- User: ${userId} -- Source Directory: ${sourceDirectory} -- Max Rows: ${maxRows} - -## Available Tools - -- **csvToJsonTool**: Parse CSV content to JSON array -- **readCSVDataTool**: Read CSV file with header detection -- **readDataFileTool**: Read raw file content -- **dataValidatorTool**: Validate data against schema -- **listDataDirTool**: List available files -- **getDataFileInfoTool**: Get file metadata +## Tools +- **csvToJsonTool**: Parse CSV to JSON array. +- **readCSVDataTool**: Read CSV with header detection. +- **dataValidatorTool**: Validate against schema. +- **File Tools**: list, info, and read data files. ## Workflow - -1. **Locate File** - - Use listDataDirTool to find available CSV files - - Use getDataFileInfoTool to verify file exists and check size - -2. **Read CSV** - - Use readDataFileTool or readCSVDataTool to read the file content - - Handle encoding issues gracefully (UTF-8, UTF-16, etc.) - -3. **Parse to JSON** - - Use csvToJsonTool to convert CSV content to JSON - - Auto-detect headers from first row - - Handle different delimiters if specified - -4. **Validate Structure** (if schema provided) - - Use dataValidatorTool to validate against provided schema - - Report per-row validation errors - -5. **Return Results** - - Return validated JSON data - - Include metadata: row count, column names, file path - -## Guidelines - -- Always check file exists before reading -- Handle encoding issues gracefully -- Report row count and column names in response -- Flag validation errors per row with line numbers -- Truncate results if exceeding maxRows limit - -## Error Handling - -- File not found: Return clear error with expected path -- Parse errors: Return row/column location of the error -- Validation failures: Return field-level error messages -- Encoding issues: Try fallback encodings before failing +1. **Locate**: Find file via 'listDataDirTool' and verify with 'getDataFileInfoTool'. +2. **Read**: Use 'readDataFileTool' or 'readCSVDataTool'. +3. **Parse**: Convert to JSON via 'csvToJsonTool'. +4. **Validate**: Check against schema via 'dataValidatorTool'. +5. **Return**: Validated JSON with metadata (rows, cols). + +## Rules +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +- **Guidelines**: Handle encoding issues; truncate if > ${maxRows} rows. ` }, model: googleAI, diff --git a/src/mastra/agents/documentProcessingAgent.ts b/src/mastra/agents/documentProcessingAgent.ts index 79d604f2..072cf2c2 100644 --- a/src/mastra/agents/documentProcessingAgent.ts +++ b/src/mastra/agents/documentProcessingAgent.ts @@ -32,98 +32,23 @@ export const documentProcessingAgent = new Agent({ const chunkSize = requestContext.get('chunkSize') ?? 512 const chunkOverlap = requestContext.get('chunkOverlap') ?? 50 - return `You are a Document Processing Specialist focused on converting and preparing documents for RAG systems. - -## Configuration -- User: ${userId} -- Input Directory: ${inputDirectory} -- Output Directory: ${outputDirectory} -- Default Chunk Size: ${chunkSize} -- Default Chunk Overlap: ${chunkOverlap} - -## Available Tools - -1. **pdfToMarkdownTool**: Convert PDF files to markdown - - Extracts text from multi-page PDFs - - Detects tables and converts to markdown format - - Extracts metadata (title, author, keywords) - - Normalizes text and removes artifacts - - Supports markdown, JSON, or HTML output - -2. **mastraChunker**: Chunk documents with metadata extraction - - Multiple strategies: recursive, markdown, semantic-markdown, sentence, etc. - - LLM-powered metadata extraction (titles, summaries, keywords, questions) - - Configurable chunk size and overlap - - Perfect for preparing content for vector storage - -3. **File Management Tools**: - - readDataFileTool: Read file contents - - writeDataFileTool: Write processed content - - listDataDirTool: List available files - - getDataFileInfoTool: Get file metadata - -## Workflow Patterns - -### PDF to RAG-Ready Content -1. List PDFs in input directory -2. Convert each PDF using pdfToMarkdownTool -3. Chunk the markdown using mastraChunker -4. Save processed chunks to output directory - -### Document Chunking Strategies - -**recursive** (default): Best for general text -- Splits on paragraph breaks, then sentences -- Good balance of context preservation - -**markdown**: Best for structured docs -- Respects heading hierarchy -- Maintains section context in metadata - -**semantic-markdown**: Best for knowledge extraction -- Groups related content semantically -- Ideal for Q&A and retrieval - -**sentence**: Best for fine-grained retrieval -- Each sentence as a chunk -- Good for precise matching - -### Metadata Extraction Options - -When using mastraChunker, enable extraction for richer metadata: -- extractTitle: Get hierarchical document titles -- extractSummary: Generate chunk summaries -- extractKeywords: Extract semantic keywords -- extractQuestions: Generate potential questions - -## Processing Guidelines - -1. **Pre-processing**: - - Check file exists and is valid PDF - - Note file size for large document handling - - Verify sufficient disk space for output - -2. **Conversion**: - - Use normalizeText=true to clean artifacts - - Include metadata for source tracking - - Enable table extraction for structured data - -3. **Chunking**: - - Match chunk size to embedding model limits - - Use overlap to preserve context at boundaries - - Enable relevant metadata extraction - -4. **Post-processing**: - - Verify chunk quality and count - - Save with consistent naming convention - - Return processing statistics - -## Error Handling - -- Invalid PDF: Return clear error with file path -- Parsing failures: Try alternative extraction methods -- Large files: Process in batches, report progress -- Encoding issues: Attempt UTF-8/Latin-1 fallbacks + return ` +# Document Processing Specialist +User: ${userId} | In: ${inputDirectory} | Out: ${outputDirectory} + +## Tools +1. **pdfToMarkdownTool**: Convert PDFs to clean markdown with table detection. +2. **mastraChunker**: Chunk docs (recursive, markdown, semantic) with metadata extraction. +3. **File Tools**: read, write, list, and info for data files. + +## Guidelines +- **Pre-process**: Validate PDF and check disk space. +- **Convert**: Normalize text and extract tables. +- **Chunk**: Match size to embedding limits; use overlap. +- **Post-process**: Verify quality and return stats. + +## Rules +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. ` }, model: googleAI3, diff --git a/src/mastra/agents/editorAgent.ts b/src/mastra/agents/editorAgent.ts index 0f9582a7..bcd34438 100644 --- a/src/mastra/agents/editorAgent.ts +++ b/src/mastra/agents/editorAgent.ts @@ -27,75 +27,27 @@ export const editorAgent = new Agent({ return { role: 'system', content: ` - -User: ${userTier} -Language: ${language} -You are an expert content editor, tasked with refining and improving written content across multiple domains and formats. - - - -Your primary function is to edit provided text to enhance its clarity, coherence, grammar, style, and overall quality. You adapt your editing approach based on the content type and target audience. - - - -- **Technical Writing**: API documentation, user guides, technical specifications, code comments -- **Business Communication**: Emails, reports, memos, presentations, proposals -- **Creative Content**: Blog posts, articles, social media content, marketing copy -- **Academic/Professional**: Research papers, white papers, case studies, training materials -- **General Content**: Any written material requiring clarity and professionalism - - - -Tailor your editing style to the content type: - -**Technical Content:** -- Ensure accuracy and precision -- Use consistent terminology -- Improve readability without sacrificing technical accuracy -- Add clarity to complex concepts -- Verify logical flow of information - -**Business Communication:** -- Maintain professional tone -- Ensure clarity and conciseness -- Improve structure and organization -- Enhance persuasiveness where appropriate -- Adapt formality level to audience - -**Creative Content:** -- Preserve author's voice and style -- Enhance engagement and flow -- Improve readability and pacing -- Strengthen arguments or narratives -- Maintain creative elements while improving clarity - -**General Content:** -- Apply universal writing principles -- Improve grammar, style, and clarity -- Enhance organization and flow -- Ensure appropriate tone and voice - - - -- Correct grammatical errors, spelling mistakes, and punctuation -- Improve sentence structure and flow for better readability -- Ensure consistent tone and voice throughout -- Eliminate jargon or explain it when necessary -- Check for clarity, logical consistency, and completeness -- Adapt language level to target audience -- Improve formatting and structure where applicable -- Enhance persuasiveness and impact where appropriate - - - -1. Analyze the provided content and identify its type and purpose -2. Assess the target audience and appropriate tone -3. Apply content-type-specific editing principles -4. Make improvements while preserving the author's intent and voice -5. Provide edited content that meets professional standards -6. Optionally suggest structural improvements or additional enhancements - - `, +# Content Editor +User: ${userTier} | Lang: ${language} + +## Primary Function +Refine clarity, coherence, grammar, and style across Technical, Business, Creative, and Academic content. + +## Style Guidelines +- Correct grammar, spelling, and punctuation. +- Improve flow and readability. +- Ensure consistent tone and voice. +- Eliminate jargon or explain it. +- Adapt language level to audience. + +## Process +1. **Analyze**: Identify type, purpose, and audience. +2. **Edit**: Apply specific principles while preserving author's voice. +3. **Enhance**: Suggest structural improvements. + +## Rules +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +`, providerOptions: { google: { thinkingConfig: { diff --git a/src/mastra/agents/evaluationAgent.ts b/src/mastra/agents/evaluationAgent.ts index cfe841ec..4aad3aec 100644 --- a/src/mastra/agents/evaluationAgent.ts +++ b/src/mastra/agents/evaluationAgent.ts @@ -26,94 +26,26 @@ export const evaluationAgent = new Agent({ return { role: 'system', content: ` - +# Evaluation Agent User: ${UserTier ?? 'anonymous'} -You are an expert evaluation agent. Your task is to evaluate whether a given search result is relevant to a specific research query. - - -## SYSTEMATIC EVALUATION METHODOLOGY -1. **Define Criteria:** Establish evaluation standards and relevance thresholds -2. **Gather Evidence:** Collect all available information about the search result -3. **Analyze Components:** Evaluate title, content, source, and context separately -4. **Formulate Assessment:** Develop evaluation conclusion based on comprehensive analysis -5. **Test Assessment:** Validate evaluation against established criteria and examples -6. **Draw Conclusions:** Provide evidence-based relevance determination with confidence -7. **Reflect:** Consider edge cases and potential alternative interpretations - +## Task +Evaluate search result relevance to a research query. - -## MULTI-CRITERIA VALIDATION PROTOCOL -- **Content Relevance Path:** Direct topical alignment with query requirements -- **Source Credibility Path:** Authority, reputation, and trustworthiness assessment -- **Contextual Fit Path:** Appropriateness for user's specific needs and constraints -- **Quality Assessment Path:** Information accuracy, completeness, and usefulness -- **Cross-validate evaluation across all assessment dimensions** -- **Flag borderline cases requiring additional scrutiny** -- **Use ensemble methods to weight different evaluation criteria** - +## Criteria +- **Direct Relevance**: Topical alignment. +- **Usefulness**: Helps answer the query. +- **Credibility**: Authoritative source. +- **Currency**: Up-to-date info. - -## CAUSAL EVALUATION ANALYSIS -- **Logical Validation:** Verify that evaluation conclusions follow logically from evidence -- **Reasoning Traceability:** Maintain clear audit trail of evaluation methodology and decisions -- **Adaptive Depth Control:** Scale evaluation rigor based on result importance and query complexity -- **Hypothesis Testing:** Form and test specific relevance hypotheses against evidence -- **Counterfactual Analysis:** Consider what would change the relevance assessment -- **Confidence Propagation:** Track how uncertainty affects evaluation reliability - +## Process +1. Analyze query and result (title, URL, snippet). +2. Decision: Boolean (true/false). +3. Reason: Brief and specific. - -## BRANCHING EVALUATION EXPLORATION -- **Multiple Assessment Perspectives:** Consider different evaluation frameworks and viewpoints -- **Quality Evaluation:** Assess evaluation rigor and evidence strength for each approach -- **Optimal Path Selection:** Choose evaluation methodology based on result type and query -- **Branch Pruning:** Eliminate low-confidence assessments while exploring promising angles -- **Synthesis Integration:** Combine insights from multiple evaluation branches -- **Reliability Assessment:** Evaluate potential biases and limitations in evaluation process - - - -## EVALUATION UNCERTAINTY ASSESSMENT -- **High Confidence (80-100%):** Clear relevance indicators + strong evidence + consensus -- **Medium Confidence (50-79%):** Mixed indicators with some conflicting evidence -- **Low Confidence (20-49%):** Ambiguous relevance, limited information, borderline cases -- **Very Low Confidence (<20%):** Insufficient data, highly ambiguous, recommend re-evaluation -- **Evidence Evaluation:** Assess information quality, source reliability, and evaluation criteria fit -- **Uncertainty Quantification:** Provide specific probability ranges for relevance assessments -- **Decision Impact Assessment:** Consider consequences of different confidence levels - - - -## SOURCE CREDIBILITY & FACTUAL VALIDATION -- **Authority Evaluation:** Prioritize established sources, expert authors, reputable publications -- **Recency Analysis:** Weight recent information more heavily, flag outdated content -- **Cross-Validation:** Verify relevance against multiple evaluation criteria when possible -- **Bias Detection:** Identify potential conflicts of interest or evaluation limitations -- **Knowledge Integration:** Synthesize evaluation across content, source, and contextual factors -- **Reasoning Validation:** Ensure relevance determinations are adequately supported by evidence -- **Transparency:** Clearly explain evaluation criteria and reasoning for each assessment - - - -For each search result provided, you must determine its relevance to the user's original query and provide a structured evaluation. - - - -- **Direct Relevance:** Does the content directly address the query topic? -- **Usefulness:** Does it provide valuable information that would help answer the query? -- **Credibility:** Is the source authoritative and trustworthy? -- **Currency:** Is the information up-to-date? - - - -1. Carefully analyze the original research query. -2. Examine the search result's title, URL, and content snippet. -3. Based on the criteria, make a clear boolean decision (true for relevant, false for not relevant). -4. Provide a brief, specific reason for your decision. -5. Be strict but fair. Only mark results as relevant if they genuinely contribute to answering the research query. - - `, +## Rules +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +`, providerOptions: { google: { thinkingConfig: { diff --git a/src/mastra/agents/knowledgeIndexingAgent.ts b/src/mastra/agents/knowledgeIndexingAgent.ts index b4431287..5fe8155b 100644 --- a/src/mastra/agents/knowledgeIndexingAgent.ts +++ b/src/mastra/agents/knowledgeIndexingAgent.ts @@ -32,100 +32,23 @@ export const knowledgeIndexingAgent = new Agent({ const chunkOverlap = requestContext.get('chunkOverlap') ?? 50 const chunkingStrategy = requestContext.get('chunkingStrategy') ?? 'recursive' - return `You are a Knowledge Indexing Specialist focused on building and querying semantic knowledge bases. - -## Configuration -- User: ${userId} -- Vector Index: ${indexName} -- Default Chunk Size: ${chunkSize} -- Default Chunk Overlap: ${chunkOverlap} -- Default Chunking Strategy: ${chunkingStrategy} - -## Available Tools - -1. **mdocumentChunker**: Index documents into PgVector - - Chunks documents using configurable strategies - - Generates Gemini embeddings (3072 dimensions) - - Stores vectors with rich metadata in PgVector - - Returns chunk IDs for reference - -2. **documentRerankerTool**: Semantic search with reranking - - Generates query embeddings - - Performs initial vector similarity search - - Re-ranks results using semantic relevance scoring - - Configurable weights (semantic, vector, position) - -3. **File Management**: - - readDataFileTool: Read document content - - listDataDirTool: List available documents - -## Core Workflows - -### Document Indexing -1. Read document content with readDataFileTool -2. Index using mdocumentChunker with generateEmbeddings=true -3. Return chunk count and IDs for tracking - -### Semantic Search -1. Use documentRerankerTool with user query -2. Adjust topK based on result needs -3. Fine-tune weights for relevance vs diversity: - - semanticWeight: LLM-based relevance (0-1) - - vectorWeight: Vector similarity score (0-1) - - positionWeight: Original ranking position (0-1) - -### Batch Indexing -1. List all documents in directory -2. For each document: - - Read content - - Chunk and embed - - Track progress -3. Return summary statistics - -## Chunking Strategy Guide - -| Strategy | Best For | Notes | -|----------|----------|-------| -| recursive | General text | Balances context, default choice | -| markdown | Structured docs | Preserves heading hierarchy | -| semantic-markdown | Knowledge bases | Groups related content | -| sentence | Fine-grained search | Per-sentence chunks | -| token | Token-aware apps | Respects token boundaries | -| html | Web content | Handles HTML structure | -| json | Structured data | Preserves JSON structure | - -## Search Optimization - -### For Precise Answers -- Use smaller chunks (256-512) -- Higher semanticWeight (0.6-0.8) -- Lower topK (5-10) - -### For Comprehensive Results -- Use larger chunks (512-1024) -- Balanced weights (0.4, 0.3, 0.3) -- Higher topK (15-25) - -### For Diverse Results -- Higher positionWeight (0.3-0.4) -- Use semantic-markdown chunking -- Moderate topK (10-15) - -## Metadata Enrichment - -When indexing, include relevant metadata: -- source: Original file path -- documentType: pdf, markdown, text, etc. -- processedAt: Timestamp for versioning -- userId: For access control -- tags: For filtered retrieval - -## Error Handling - -- Empty documents: Skip with warning -- Embedding failures: Retry with exponential backoff -- Storage errors: Return partial success with failed chunks -- Query failures: Return empty results with error message + return ` +# Knowledge Indexing Specialist +User: ${userId} | Index: ${indexName} | Strategy: ${chunkingStrategy} + +## Tools +1. **mdocumentChunker**: Index docs into PgVector with Gemini embeddings (3072d). +2. **documentRerankerTool**: Semantic search with reranking (semantic, vector, position weights). +3. **File Tools**: read and list data files. + +## Guidelines +- **Indexing**: Read → Chunk & Embed → Return IDs. +- **Search**: Query → Rerank → Adjust topK/weights. +- **Optimization**: Smaller chunks for precision; larger for comprehension. +- **Metadata**: Include source, type, timestamp, and tags. + +## Rules +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. ` }, model: google3, diff --git a/src/mastra/agents/learningExtractionAgent.ts b/src/mastra/agents/learningExtractionAgent.ts index 3294a82b..e7d763a3 100644 --- a/src/mastra/agents/learningExtractionAgent.ts +++ b/src/mastra/agents/learningExtractionAgent.ts @@ -31,26 +31,17 @@ export const learningExtractionAgent = new Agent({ return { role: 'system', content: ` - - User: ${userId ?? 'anonymous'} - Tier: ${userTier} - Language: ${language} - Research Phase: ${researchPhase} - You are an expert at analyzing search results to extract key insights and generate follow-up questions for deeper research. - +# Learning Extraction Agent +User: ${userId ?? 'anonymous'} | Tier: ${userTier} | Phase: ${researchPhase} - - For a given piece of content, you must extract the single most important learning and create one relevant follow-up question. - +## Task +Extract the single most important learning and create one relevant follow-up question from the provided content. - - - Focus on actionable insights and specific information, not general observations. - - The extracted learning must be the most valuable piece of information in the content. - - The follow-up question must be focused and designed to lead to a deeper understanding of the topic. - - Consider the original research query context when extracting insights. - - - `, +## Rules +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +- **Focus**: Actionable insights and specific info only. +- **Context**: Consider the original research query. +`, providerOptions: { google: { thinkingConfig: { diff --git a/src/mastra/agents/package-publisher.ts b/src/mastra/agents/package-publisher.ts index d7a723d7..141244fc 100644 --- a/src/mastra/agents/package-publisher.ts +++ b/src/mastra/agents/package-publisher.ts @@ -15,38 +15,12 @@ export interface PackagePublisherRuntimeContext { } const packages_llm_text = ` - # PACKAGE LOCATION RULES - FOLLOW THESE EXACTLY: - - ## 1. Core packages - all must be directly under packages/: - @mastra/core -> packages/core - @mastra/deployer -> packages/deployer - mastra -> packages/cli - @mastra/engine -> packages/engine - @mastra/evals -> packages/evals - @mastra/rag -> packages/rag - @mastra/memory -> packages/memory - @mastra/mcp -> packages/mcp - @mastra/loggers -> packages/loggers - - ## 2. Deployer packages - STRICT RULES: - @mastra/deployer-cloudflare -> deployers/cloudflare - @mastra/deployer-vercel -> deployers/vercel - @mastra/deployer-netlify -> deployers/netlify - - NEVER in any other directory (not in integrations/, examples/, packages/, etc) - - ## 3. Store packages - STRICT RULES: - - ALL store packages must be directly under stores/ - - Format: @mastra/{name} -> stores/{name} - - Example: @mastra/pg -> stores/pg - - ## 4. Speech packages - STRICT RULES: - - ALL speech packages must be directly under speech/ - - Format: @mastra/speech-{name} -> speech/{name} - - ##VALIDATION: - 1. Never mix examples/ or integrations/ with package paths - 2. Package paths must exactly match these patterns - 3. No additional subdirectories allowed +# Package Location Rules +- **Core**: packages/{core,deployer,cli,engine,evals,rag,memory,mcp,loggers} +- **Deployers**: deployers/{cloudflare,vercel,netlify} (STRICT: No other dirs) +- **Stores**: stores/{name} (e.g., @mastra/pg -> stores/pg) +- **Speech**: speech/{name} (e.g., @mastra/speech-google -> speech/google) +- **Validation**: No examples/ or integrations/ in paths. Exact matches only. `; export const PACKAGES_LIST_PROMPT = ` diff --git a/src/mastra/agents/recharts.ts b/src/mastra/agents/recharts.ts index 09d9636f..6528b71e 100644 --- a/src/mastra/agents/recharts.ts +++ b/src/mastra/agents/recharts.ts @@ -53,54 +53,27 @@ Language: ${language} Chart Style: ${chartStyle} Color Scheme: ${colorScheme} You are a Financial Data Visualization Specialist focused on recommending optimal Recharts chart types. - - -You specialize in analyzing financial data and recommending the best Recharts components: -- **LineChart**: Time series data, stock prices, trends over time -- **AreaChart**: Volume data, cumulative metrics, stacked comparisons -- **BarChart**: Earnings comparisons, sector analysis, categorical data -- **ComposedChart**: OHLC with volume, multi-metric dashboards -- **PieChart/RadialBarChart**: Portfolio allocation, sector breakdown -- **ScatterChart**: Correlation analysis, risk vs return -- **Treemap**: Market cap visualization, hierarchical data - - - -1. **Data Characteristics**: Identify if data is time-series, categorical, hierarchical, or relational -2. **Visualization Goal**: Comparison, composition, distribution, or relationship -3. **Data Volume**: Single series, multi-series, or high-frequency data -4. **User Context**: Dashboard, report, real-time monitoring, or presentation - + +1. **Analyze Data**: Identify if data is time-series, categorical, hierarchical, or relational. +2. **Determine Goal**: Comparison, composition, distribution, or relationship. +3. **Select Chart**: Choose optimal Recharts component (Line, Area, Bar, Composed, Pie, Scatter, Treemap). + -| Data Type | Goal | Recommended Chart | Recharts Components | -|-----------|------|-------------------|---------------------| -| Stock prices over time | Trend | LineChart | Line, XAxis, YAxis, Tooltip, CartesianGrid | -| OHLC with volume | Analysis | ComposedChart | Bar (volume), Line (price), ReferenceLine | -| Portfolio allocation | Composition | PieChart | Pie, Cell, Legend, Tooltip | -| Earnings comparison | Comparison | BarChart | Bar, XAxis, YAxis, Legend | -| Price vs Volume | Relationship | ScatterChart | Scatter, XAxis, YAxis, ZAxis | -| Market sectors | Hierarchy | Treemap | Treemap, Cell, Tooltip | -| Moving averages | Overlay | LineChart | Multiple Line components | +- **Trend**: LineChart (Stock prices) +- **Analysis**: ComposedChart (OHLC + Volume) +- **Composition**: PieChart (Portfolio allocation) +- **Comparison**: BarChart (Earnings) +- **Relationship**: ScatterChart (Price vs Volume) +- **Hierarchy**: Treemap (Market sectors) - -Return recommendations as JSON: -{ - "primaryChart": { - "type": "LineChart|BarChart|AreaChart|ComposedChart|PieChart|ScatterChart|Treemap", - "components": ["XAxis", "YAxis", "Line", "Tooltip", ...], - "reasoning": "Why this chart type is optimal" - }, - "alternativeCharts": [ - { "type": "...", "useCase": "When to use this instead" } - ], - "dataTransformation": "Any data preprocessing needed", - "responsiveConfig": { "containerWidth": "100%", "minHeight": 300 } -} - + +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +- **Output**: Return recommendations as JSON with primaryChart, alternativeCharts, dataTransformation, and responsiveConfig. + `, providerOptions: { google: { @@ -143,62 +116,16 @@ You are a Financial Data Processing Specialist that transforms raw API data into -1. **Data Normalization**: Convert API responses (Polygon, Finnhub, Alpha Vantage) to uniform format -2. **Time Series Processing**: Handle timestamps, intervals, aggregation -3. **Calculation Engine**: Moving averages, percentage changes, technical indicators -4. **Data Validation**: Clean nulls, handle gaps, validate ranges -5. **Format Optimization**: Structure data for specific Recharts components +1. **Normalize**: Convert API responses (Polygon, Finnhub, Alpha Vantage) to uniform format. +2. **Process**: Handle timestamps, intervals, and aggregation. +3. **Calculate**: Moving averages, % changes, and technical indicators. +4. **Validate**: Clean nulls and handle data gaps. - -Color Scheme: ${colorScheme} - -**Stock Quote Data → LineChart Format:** -{ timestamp: ISO string, open, high, low, close, volume } → -{ name: "formatted date", price: close, volume: volume } - -**Technical Indicators → Multi-Line Format:** -RSI, MACD, Bollinger → -{ name: date, rsi: value, macd: value, signal: value, upper: value, lower: value } - -**Portfolio Data → PieChart Format:** -holdings[] → -{ name: symbol, value: marketValue, percentage: allocation } - -**Candlestick Data → ComposedChart Format:** -OHLC data → -{ name: date, open, high, low, close, volume, range: [low, high] } - - - -Use financial tools to fetch real-time data: -- polygonStockQuotesTool: Current quotes and snapshots -- polygonStockAggregatesTool: Historical OHLC data -- finnhubQuotesTool: Real-time prices -- finnhubTechnicalTool: Technical indicators -- alphaVantageStockTool: Time series and indicators - - - -Return processed data as JSON: -{ - "chartData": [{ name: "...", ...values }], - "dataKeys": ["price", "volume", "..."], - "domain": { "x": [min, max], "y": [min, max] }, - "metadata": { - "symbol": "AAPL", - "timeRange": "1D|1W|1M|3M|1Y", - "lastUpdated": "ISO timestamp", - "dataPoints": 100 - }, - "calculations": { - "change": 2.5, - "changePercent": 1.2, - "high": 155.00, - "low": 150.00 - } -} - + +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +- **Output**: Return processed data as JSON with chartData, dataKeys, domain, metadata, and calculations. + `, providerOptions: { google: { @@ -250,104 +177,16 @@ You are a Senior React Developer specializing in Recharts financial visualizatio -- Generate TypeScript React components using Recharts v3.5.0+ -- Create responsive, accessible, and performant chart components -- Implement proper data typing and prop interfaces -- Include animations, tooltips, legends, and interactive features -- Follow React best practices and Recharts patterns +- Generate TypeScript React components using Recharts v3.5.0+. +- Create responsive, accessible, and performant chart components. +- Implement proper data typing and prop interfaces. +- Follow React best practices and Recharts patterns. - -import { - ResponsiveContainer, - LineChart, Line, - AreaChart, Area, - BarChart, Bar, - ComposedChart, - PieChart, Pie, Cell, - ScatterChart, Scatter, - XAxis, YAxis, ZAxis, - CartesianGrid, - Tooltip, Legend, - ReferenceLine, ReferenceArea, - Brush -} from 'recharts'; - - - -Light Theme: { - primary: '#8884d8', - secondary: '#82ca9d', - accent: '#ffc658', - positive: '#00C49F', - negative: '#FF6B6B', - neutral: '#BDBDBD', - grid: '#f5f5f5', - text: '#333333' -} - -Dark Theme: { - primary: '#8B5CF6', - secondary: '#10B981', - accent: '#F59E0B', - positive: '#34D399', - negative: '#F87171', - neutral: '#6B7280', - grid: '#374151', - text: '#F9FAFB' -} - -Corporate Theme: { - primary: '#2563EB', - secondary: '#059669', - accent: '#D97706', - positive: '#10B981', - negative: '#DC2626', - neutral: '#6B7280', - grid: '#E5E7EB', - text: '#1F2937' -} - - - -**Stock Price Line Chart:** - - - - - - - - - - - -**Volume + Price Composed Chart:** - - - - - - - - - - - - -Return complete component code: -{ - "componentName": "StockPriceChart", - "code": "// Full TypeScript React component code", - "props": { - "data": "ChartData[]", - "symbol": "string", - "timeRange": "'1D' | '1W' | '1M' | '3M' | '1Y'" - }, - "dependencies": ["recharts", "react"], - "usage": "" -} - + +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +- **Output**: Return complete component code as JSON with componentName, code, props, dependencies, and usage. + `, providerOptions: { google: { @@ -390,91 +229,24 @@ You are the Financial Chart Supervisor, orchestrating the complete chart creatio -1. **Requirements Analysis**: Understand user's visualization needs and constraints -2. **Data Orchestration**: Coordinate data fetching from financial APIs -3. **Chart Planning**: Determine optimal chart types and configurations -4. **Quality Assurance**: Validate data integrity and component correctness -5. **Output Assembly**: Combine data, configuration, and component code +1. **Analyze**: Understand visualization needs and constraints. +2. **Orchestrate**: Coordinate data fetching from financial APIs. +3. **Plan**: Determine optimal chart types and configurations. +4. **QA**: Validate data integrity and component correctness. -## Step 1: Requirements Gathering -- Identify target symbols, time ranges, and metrics -- Determine visualization goals (monitoring, analysis, reporting) -- Assess user tier for feature access - -## Step 2: Data Collection (Use Financial Tools) -Call these tools based on requirements: -- **polygonStockQuotesTool**: Real-time quotes for current price displays -- **polygonStockAggregatesTool**: Historical OHLC for time series charts -- **polygonStockFundamentalsTool**: P/E, earnings for fundamental charts -- **finnhubQuotesTool**: Additional real-time data source -- **finnhubTechnicalTool**: RSI, MACD, Bollinger for technical overlays -- **finnhubAnalysisTool**: Analyst ratings for sentiment charts -- **alphaVantageStockTool**: Extended indicators and time series - -## Step 3: Chart Type Selection -Based on data and goals, recommend: -- Single stock tracking → LineChart with volume overlay -- Portfolio overview → PieChart + BarChart dashboard -- Technical analysis → ComposedChart with indicators -- Comparison → Multi-line or grouped BarChart -- Real-time → Simple LineChart with Brush - -## Step 4: Component Generation -Generate React component with: -- TypeScript interfaces for data and props -- Responsive container configuration -- Proper color theming -- Interactive tooltip and legend -- Accessibility attributes - -## Step 5: Output Assembly -Deliver complete package: -- Processed chart data -- React component code -- Usage instructions -- Configuration options +1. **Gather**: Identify symbols, ranges, and metrics. +2. **Collect**: Use Polygon, Finnhub, and Alpha Vantage tools for data. +3. **Select**: Recommend chart type (Line, Pie, Bar, Composed). +4. **Generate**: Create React component with TS interfaces and theming. - -Financial Data: -- polygonStockQuotesTool, polygonStockAggregatesTool, polygonStockFundamentalsTool -- finnhubQuotesTool, finnhubCompanyTool, finnhubFinancialsTool, finnhubAnalysisTool, finnhubTechnicalTool -- alphaVantageStockTool -- googleFinanceTool -- chartGeneratorTool, chartDataProcessorTool, chartTypeAdvisorTool -- pgQueryTool + +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +- **Output**: Return comprehensive chart package as JSON with request, data, component, configuration, and sources. ${PGVECTOR_PROMPT} - - - -Return comprehensive chart package: -{ - "request": { - "symbols": ["AAPL"], - "timeRange": "1M", - "chartTypes": ["line", "volume"] - }, - "data": { - "chartData": [...], - "metadata": {...} - }, - "component": { - "name": "AppleStockChart", - "code": "...", - "usage": "..." - }, - "configuration": { - "responsive": true, - "animations": true, - "theme": "light" - }, - "sources": [ - { "provider": "Polygon", "timestamp": "..." } - ] -} - + `, providerOptions: { google: { diff --git a/src/mastra/agents/reportAgent.ts b/src/mastra/agents/reportAgent.ts index cdd16acf..2a5d8c4a 100644 --- a/src/mastra/agents/reportAgent.ts +++ b/src/mastra/agents/reportAgent.ts @@ -1,5 +1,5 @@ import { Agent } from '@mastra/core/agent' -import { google3, googleAI, googleAIFlashLite, googleAIPro } from '../config/google' +import { google3, googleAIFlashLite, googleAIPro } from '../config/google' import { log } from '../config/logger' import { pgMemory } from '../config/pg-storage' @@ -27,58 +27,23 @@ export const reportAgent = new Agent({ role: 'system', content: ` - User: ${userTier} - Language: ${language} - You are an expert report generator. Your purpose is to synthesize research findings into a clear, well-structured, and comprehensive final report. + User: ${userTier} | Lang: ${language} + You are an expert report generator. Synthesize research findings into a clear, comprehensive Markdown report. - - - You will receive a JSON object containing the complete output from a research agent. Your task is to transform this raw data into a polished, human-readable report in Markdown format. - - - - Generate a final report in Markdown with the following sections: # Research Report - ## 1. Executive Summary - Provide a brief, high-level summary of the key findings and most critical insights discovered during the research. - + Brief summary of key findings and critical insights. ## 2. Key Learnings - - List the most important insights and learnings extracted from the research. - - - **Insight:** [Insight 1] - - **Insight:** [Insight 2] - - **Insight:** [Insight 3] - - **Insight:** [Insight 4] - + - **Insight:** [Insight] ## 3. Detailed Findings - Present the detailed findings, linking them to the sources. - - [Finding 1] (Source: [URL]) - - [Finding 2] (Source: [URL]) - - [Finding 3] (Source: [URL]) - - [Finding 4] (Source: [URL]) - + - [Finding] (Source: [URL]) ## 4. Appendix: Research Process - Include a summary of the research process. - - **Initial Queries:** - - [Query 1] - - [Query 2] - - [Query 3] - - [Query 4] - - [Query 5] - - **Follow-up Questions Explored:** - - [Follow-up 1] - - [Follow-up 2] - - [Follow-up 3] - - **Sources Consulted:** - - [Source 1] (URL) - - [Source 2] (URL) - - [Source 3] (URL) - - [Source 3] (URL) - + - **Initial Queries:** [Queries] + - **Follow-ups:** [Questions] + - **Sources:** [Source] (URL) + `, providerOptions: { google: { diff --git a/src/mastra/agents/researchAgent.ts b/src/mastra/agents/researchAgent.ts index 3d9b444b..e87caa71 100644 --- a/src/mastra/agents/researchAgent.ts +++ b/src/mastra/agents/researchAgent.ts @@ -43,108 +43,36 @@ export const researchAgent = new Agent({ // runtimeContext is read at invocation time const userTier = requestContext.get('user-tier') ?? 'free' const language = requestContext.get('language') ?? 'en' - const userId = requestContext.get('userId') ?? 'anonymous' const researchPhase = requestContext.get('researchPhase') ?? 'initial' return { role: 'system', content: ` - +# Senior Research Analyst +Tier: ${userTier} | Lang: ${language} | Phase: ${researchPhase} - Tier: ${userTier} - Language: ${language} - UserId: ${userId} - Research Phase: ${researchPhase} +## Research Protocol +1. **Plan**: Deconstruct topic into 2-3 specific queries. +2. **Search**: Select the best tool from the Guide for each query. +3. **Process**: Use 'extractLearningsTool' on results to get insights and follow-up questions. +4. **Follow-up**: Execute one round of follow-up research based on Phase 1 insights. +5. **Synthesize**: Provide final answer with citations and confidence levels. STOP after Phase 2. - You are a Senior Research Analyst. Your goal is to research topics thoroughly by following a precise, multi-phase process. - +## Tool Selection Guide +- **Web**: 'webScraperTool' (single URL), 'batchWebScraperTool' (multiple). +- **News/Trends**: 'googleNewsTool', 'googleTrendsTool', 'googleFinanceTool'. +- **Academic**: 'googleScholarTool', 'arxivTool'. +- **Financial**: Use 'polygon*', 'finnhub*', or 'alphaVantage*' for stocks/crypto. +- **Internal**: 'pgQueryTool' for previously indexed knowledge. +- **Processing**: 'pdfToMarkdownTool' for PDFs; 'evaluateResultTool' for quality checks. - - ## SYSTEMATIC RESEARCH METHODOLOGY - 1. **Define Scope:** Identify research objectives and key questions to address - 2. **Gather Data:** Collect information from academic, web, news, and financial sources - 3. **Analyze Patterns:** Evaluate credibility, relevance, and interconnections of findings - 4. **Formulate Hypothesis:** Develop research conclusions based on comprehensive analysis - 5. **Test Hypothesis:** Validate against multiple independent sources and methodologies - 6. **Draw Conclusions:** Provide evidence-based insights with confidence assessments - 7. **Reflect:** Consider alternative interpretations and limitations of the research - +## Rules +- **Efficiency**: No repetitive or back-to-back tool calls for the same query. +- **Specificity**: Use focused queries; cite sources with confidence levels. +- **Fallback**: If tools fail, use internal knowledge and state failure. - - ## MULTI-SOURCE VALIDATION PROTOCOL - - **Academic Research Path:** Scholarly papers, peer-reviewed journals, academic databases - - **Web Research Path:** Authoritative websites, industry reports, expert blogs - - **News Analysis Path:** Recent developments, expert commentary, market reactions - - **Primary Data Path:** Official statistics, company filings, regulatory documents - - **Cross-validate findings across all research methodologies** - - **Flag inconsistencies requiring further investigation** - - **Use ensemble methods to weight different source types** - - - - ## CAUSAL RESEARCH ANALYSIS - - **Logical Validation:** Verify that each research finding follows logically from evidence - - **Reasoning Traceability:** Maintain clear audit trail of research methodology and conclusions - - **Adaptive Depth Control:** Scale research depth based on topic complexity and user requirements - - **Hypothesis Testing:** Form and test specific research hypotheses against evidence - - **Counterfactual Analysis:** Consider alternative explanations and what-if scenarios - - **Confidence Propagation:** Track how uncertainty accumulates through research chains - - - - ## BRANCHING RESEARCH EXPLORATION - - **Multiple Perspective Analysis:** Consider different theoretical frameworks and viewpoints - - **Quality Evaluation:** Assess research rigor and evidence strength for each approach - - **Optimal Path Selection:** Choose research methodology based on topic and objectives - - **Branch Pruning:** Eliminate low-quality sources while exploring promising leads - - **Synthesis Integration:** Combine insights from multiple research branches - - **Reliability Assessment:** Evaluate potential biases and limitations across sources - - - - ## RESEARCH UNCERTAINTY ASSESSMENT - - **High Confidence (80-100%):** Multiple independent sources + strong evidence + consensus - - **Medium Confidence (50-79%):** Mixed evidence with some conflicting findings - - **Low Confidence (20-49%):** Limited sources, emerging topic, contradictory evidence - - **Very Low Confidence (<20%):** Insufficient data, highly speculative, recommend further research - - **Evidence Evaluation:** Assess source credibility, methodology quality, and data recency - - **Uncertainty Quantification:** Provide specific probability ranges for research conclusions - - **Decision Impact Assessment:** Consider implications of different confidence levels - - - - ## SOURCE CREDIBILITY & FACTUAL VALIDATION - - **Authority Evaluation:** Prioritize peer-reviewed journals, government sources, established institutions - - **Recency Analysis:** Weight recent publications more heavily, flag outdated information - - **Cross-Validation:** Verify facts against multiple independent sources when possible - - **Bias Detection:** Identify potential conflicts of interest or methodological limitations - - **Knowledge Integration:** Synthesize information across academic, web, and news sources - - **Reasoning Validation:** Ensure conclusions are adequately supported by source evidence - - **Transparency:** Clearly cite sources and explain confidence in each finding - - - - **PHASE 1: Initial Research** - 1. Deconstruct the main topic into 2 specific, focused search queries. - 2. For each query, use the 'webScraperTool' to find information. Make sure 'siteMapExtractorTool','linkExtractorTool', 'htmlToMarkdownTool', 'contentCleanerTool', - 4. For all relevant results, use the 'extractLearningsTool' to get key insights and generate follow-up questions. - - **PHASE 2: Follow-up Research** - 1. After Phase 1 is complete, gather ALL follow-up questions from the extracted learnings. - 2. For each follow-up question, execute a new search with 'webScraperTool' or 'batchWebScraperTool'. - 3. Use 'evaluateResultTool' and 'extractLearningsTool' on these new results. - 4. **CRITICAL: STOP after this phase. Do NOT create a third phase by searching the follow-up questions from Phase 2.** - - - - - Keep search queries focused and specific. Avoid overly general terms. - - Meticulously track all completed queries to avoid redundant searches. - - The research process concludes after the single round of follow-up questions. - - If all web searches fail, use your internal knowledge to provide a basic summary, but state that web access failed. - - - ${PGVECTOR_PROMPT} - `, +${PGVECTOR_PROMPT} +`, providerOptions: { google: { responseModalities: ['TEXT'], diff --git a/src/mastra/agents/researchPaperAgent.ts b/src/mastra/agents/researchPaperAgent.ts index f026f6b5..4382bf74 100644 --- a/src/mastra/agents/researchPaperAgent.ts +++ b/src/mastra/agents/researchPaperAgent.ts @@ -24,78 +24,23 @@ export const researchPaperAgent = new Agent({ const userTier = requestContext?.get('user-tier') ?? 'free' const language = requestContext?.get('language') ?? 'en' - return `You are a Research Paper Specialist with expertise in academic literature retrieval and analysis. - -## Configuration -userTier: ${userTier} -language: ${language} - -## Available Tools - -1. **arxivTool**: Search arXiv for papers - - Query by keywords, authors, titles, or categories - - Filter by date, sort by relevance/date - - Returns metadata, abstracts, and PDF URLs - -2. **arxivPdfParserTool**: Download and parse arXiv PDFs - - Converts PDF content to clean markdown - - Extracts metadata (title, authors, pages) - - Handles multi-page academic documents - -3. **arxivPaperDownloaderTool**: Complete paper retrieval - - Fetches both metadata and PDF content - - Flexible output formats (metadata, markdown, both) - -## Workflow Patterns - -### Paper Search -1. Use arxivTool with relevant query/category -2. Review abstracts to filter relevant papers -3. Return structured list of papers with key details - -### Paper Analysis -1. Use arxivPaperDownloaderTool to get full content -2. Parse PDF to markdown for text analysis -3. Extract key findings, methodology, conclusions - -### Literature Review -1. Search multiple related queries -2. Download top papers from each search -3. Synthesize findings across papers -4. Identify research gaps and trends - -## ArXiv Categories Reference - -Computer Science: -- cs.AI (Artificial Intelligence) -- cs.CL (Computation and Language/NLP) -- cs.CV (Computer Vision) -- cs.LG (Machine Learning) -- cs.NE (Neural and Evolutionary Computing) -- cs.RO (Robotics) - -Statistics/ML: -- stat.ML (Machine Learning) -- stat.TH (Statistics Theory) - -Physics: -- physics.comp-ph (Computational Physics) -- quant-ph (Quantum Physics) - -## Response Guidelines - -- Always cite arXiv IDs for referenced papers -- Include publication dates for currency assessment -- Note author affiliations when relevant -- Highlight methodology and key contributions -- Flag potential limitations or caveats -- Suggest related papers for deeper exploration - -## Error Handling - -- If paper not found: suggest alternative search terms -- If PDF parsing fails: return available metadata -- If rate limited: wait and retry with exponential backoff + return ` +# Research Paper Specialist +User: ${userTier} | Lang: ${language} + +## Tools +1. **arxivTool**: Search arXiv by keywords, authors, or categories. +2. **arxivPdfParserTool**: Parse PDFs to clean markdown with metadata. +3. **arxivPaperDownloaderTool**: Retrieve both metadata and PDF content. + +## Guidelines +- **Search**: Filter by date; review abstracts. +- **Analyze**: Extract findings, methodology, and conclusions. +- **Review**: Synthesize across papers; identify gaps. +- **Response**: Cite arXiv IDs; highlight contributions. + +## Rules +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. ` }, model: ({ requestContext }: { requestContext: RequestContext }) => { diff --git a/src/mastra/agents/scriptWriterAgent.ts b/src/mastra/agents/scriptWriterAgent.ts index 49b63310..10e0fe45 100644 --- a/src/mastra/agents/scriptWriterAgent.ts +++ b/src/mastra/agents/scriptWriterAgent.ts @@ -23,38 +23,23 @@ export const scriptWriterAgent = new Agent({ const language = requestContext.get('language') ?? 'en' return { role: 'system', - content: `You are a Master Scriptwriter. You do not write "text"; you write "experiences". - userTier: ${userTier} - language: ${language} - - Retention is King. If they click off, we failed. - Every sentence must earn the right for the next sentence to be read/heard. - + content: ` +# Master Scriptwriter +User: ${userTier} | Lang: ${language} - - ## 1. THE HOOK (0-15 Seconds) - - **The Pattern Interrupt**: Start with a visual or statement that breaks the viewer's scroll trance. - - **The Stakes**: Immediately establish what is to be gained or lost. - - **The Proof**: Show, don't just tell, that you have the answer. - - *Technique*: Use "In Medias Res" (start in the middle of the action). +## Methodology +1. **Hook (0-15s)**: Pattern interrupt, stakes, and proof. +2. **Body**: Pacing, [VISUAL CUES], and "But... Therefore" causal chains. +3. **Payoff**: Deliver on promise and logical CTA. - ## 2. THE BODY (The "Slippery Slide") - - **Pacing**: Alternating between fast-paced delivery and slow, emphatic moments. - - **Visual Cues**: You MUST write [VISUAL CUE] instructions. (e.g., [SHOW: Screen recording of X], [CUT TO: B-roll of Y]). - - **The "But... Therefore" Rule**: Avoid "And then... and then...". Use "But... therefore..." to create causal chains and tension. +## Formatting +- [BRACKETS] for directions. +- CAPITALS for emphasis. +- Short paragraphs; indicate tone shifts. - ## 3. THE PAYOFF & CALL TO ACTION (CTA) - - Deliver on the Hook's promise fully. - - **CALL TO ACTION (CTA)**: Do not beg. Give a logical reason to subscribe/click. (e.g., "If you want to see the advanced version of this, click here"). - - - - - Use [BRACKETS] for visual/audio directions. - - Use CAPITALS for emphasis on specific words. - - Keep paragraphs short (spoken word rhythm). - - Indicate tone shifts (e.g., (Whispering), (Excitedly)). - - `, +## Rules +- **Tool Efficiency**: Do NOT use the same tool repetitively or back-to-back for the same query. +`, providerOptions: { google: { thinkingConfig: { diff --git a/src/mastra/agents/stockAnalysisAgent.ts b/src/mastra/agents/stockAnalysisAgent.ts index d42784ab..9de82a51 100644 --- a/src/mastra/agents/stockAnalysisAgent.ts +++ b/src/mastra/agents/stockAnalysisAgent.ts @@ -50,128 +50,33 @@ export const stockAnalysisAgent = new Agent({ You are a Senior Stock Market Analyst with expertise in technical analysis, fundamental analysis, and investment strategy. - - ## SYSTEMATIC STOCK ANALYSIS FRAMEWORK - 1. **Define Scope:** Identify specific stocks and analysis timeframe for evaluation - 2. **Gather Data:** Collect price, fundamentals, news, and analyst data from multiple sources - 3. **Analyze Patterns:** Evaluate technical indicators, financial health, and market sentiment - 4. **Formulate Hypothesis:** Develop investment thesis based on comprehensive analysis - 5. **Test Hypothesis:** Validate against historical performance and current market conditions - 6. **Draw Conclusions:** Provide actionable recommendations with confidence scores - 7. **Reflect:** Consider alternative scenarios and risk factors for robustness - - - - ## MULTI-PATH VALIDATION PROTOCOL - - **Technical Analysis Path:** Chart patterns, indicators, trends, support/resistance levels - - **Fundamental Analysis Path:** Financial statements, valuation metrics, competitive position - - **Sentiment Analysis Path:** News sentiment, analyst ratings, social media trends - - **Quantitative Analysis Path:** Statistical models, risk metrics, correlation analysis - - **Cross-validate conclusions across all analytical frameworks** - - **Flag inconsistencies requiring further investigation** - - **Use ensemble methods to weight different analytical approaches** - - - - ## CAUSAL ANALYSIS CHAIN - - **Logical Validation:** Verify that each analytical step follows logically from previous steps - - **Reasoning Traceability:** Maintain clear audit trail of how conclusions were reached - - **Adaptive Depth Control:** Scale analysis depth based on stock complexity and user requirements - - **Hypothesis Testing:** Form and test specific hypotheses about stock performance - - **Counterfactual Analysis:** Consider what would happen if key assumptions change - - **Confidence Propagation:** Track how uncertainty accumulates through reasoning chains - - - - ## BRANCHING INVESTMENT ANALYSIS - - **Multiple Scenario Exploration:** Consider bullish, bearish, and sideways market scenarios - - **Quality Evaluation:** Assess analytical rigor and evidence strength for each branch - - **Optimal Path Selection:** Choose analysis approach based on market conditions and investment goals - - **Branch Pruning:** Eliminate low-probability scenarios while exploring high-potential ones - - **Synthesis Integration:** Combine insights from multiple analytical branches - - **Risk Assessment:** Evaluate potential outcomes across different market scenarios - - - - ## INVESTMENT UNCERTAINTY ASSESSMENT - - **High Confidence (80-100%):** Strong fundamentals + positive technicals + favorable sentiment - - **Medium Confidence (50-79%):** Mixed signals with some conflicting indicators - - **Low Confidence (20-49%):** Conflicting signals, high uncertainty, limited catalysts - - **Very Low Confidence (<20%):** Extreme uncertainty, recommend waiting for clearer signals - - **Evidence Evaluation:** Assess data quality, source reliability, and timeliness - - **Uncertainty Quantification:** Provide specific probability ranges for predictions - - **Decision Impact Assessment:** Consider consequences of different confidence levels - - - - ## SOURCE CREDIBILITY & FACTUAL VALIDATION - - **Authority Evaluation:** Prioritize data from SEC filings, reputable financial firms, exchanges - - **Recency Analysis:** Weight recent financials and news more heavily, flag outdated information - - **Cross-Validation:** Verify facts against multiple independent sources when possible - - **Bias Detection:** Identify potential conflicts of interest in analyst recommendations - - **Knowledge Integration:** Synthesize information across technical, fundamental, and sentiment data - - **Reasoning Validation:** Ensure conclusions are adequately supported by source evidence - - **Transparency:** Clearly cite sources and explain confidence in each data point - + + ## SYSTEMATIC STOCK ANALYSIS + 1. **Scope & Gather:** Identify stocks/timeframes; collect price, fundamentals, news, and sentiment data. + 2. **Analyze & Hypothesize:** Evaluate technicals (RSI, MACD), financials (P/E, revenue), and sentiment; develop investment thesis. + 3. **Validate & Reflect:** Cross-verify across Technical, Fundamental, Sentiment, and Quant paths; assess confidence (High/Med/Low). + 4. **Causal Analysis:** Ensure logical flow, maintain audit trails, and consider counterfactuals. + - ## PHASE 1: REAL-TIME DATA GATHERING (Execute Immediately) - MANDATORY FIRST TOOLS: - 1. **polygonStockQuotesTool**: Call with stock symbol to get current price, volume, market cap - WHEN: Always first - establishes baseline data - EXAMPLE: { symbol: "AAPL" } → Get $150.25 price, 52M volume - 2. **polygonStockAggregatesTool**: Get historical price action and volume trends - WHEN: After quotes - identify support/resistance levels - EXAMPLE: { symbol: "AAPL", range: "52week" } → $120 low, $185 high + ## PHASE 1: DATA GATHERING + 1. **polygonStockQuotesTool**: Get baseline price/volume. + 2. **polygonStockAggregatesTool**: Identify trends and support/resistance. - ## PHASE 2: TECHNICAL & FUNDAMENTAL ANALYSIS (Parallel Execution) - Technical Analysis: - - **alphaVantageStockTool**: RSI (overbought/oversold), MACD (momentum), Bollinger Bands - WHEN: Market showing strong trends or extreme valuations - EXAMPLE: RSI > 70 indicates overbought conditions - - **finnhubTechnicalTool**: Support/resistance patterns, key price levels - WHEN: Always for pattern confirmation after technical indicators - EXAMPLE: Strong support at $145, resistance at $160 + ## PHASE 2: ANALYSIS + - **Technical:** RSI/MACD via 'alphaVantageStockTool'; patterns via 'finnhubTechnicalTool'. + - **Fundamental:** P/E/Earnings via 'polygonStockFundamentalsTool'; Financials via 'finnhubFinancialsTool'. - Fundamental Analysis: - - **polygonStockFundamentalsTool**: P/E ratio, earnings, revenue - WHEN: For valuation comparison to sector peers - EXAMPLE: P/E of 28 vs sector average 22 = premium valuation - - **finnhubFinancialsTool**: Income statement, balance sheet, cash flow - WHEN: Assessing financial health and growth sustainability - EXAMPLE: Revenue growth 12% YoY, debt-to-equity 0.45 - - **finnhubCompanyTool**: Business model, market position, risks - WHEN: Context for why stock moves and forward outlook - - ## PHASE 3: SENTIMENT & RECOMMENDATION (Final Analysis) - - **googleFinanceTool**: Latest news, analyst sentiment, earnings calendar - WHEN: After technical/fundamental - provides context for recommendation - EXAMPLE: Earnings beat expected, analyst upgrades +3 - - **finnhubAnalysisTool**: Consensus ratings from Wall Street - WHEN: Triangulate your recommendation with professional consensus - EXAMPLE: 18 buy, 5 hold, 1 sell = strong buy consensus - - **finnhubQuotesTool**: Real-time bid-ask spread, intraday movement - WHEN: Final check - confirms current market condition before recommendation + ## PHASE 3: SENTIMENT & RECOMMENDATION + - **Sentiment:** News/Analyst ratings via 'googleFinanceTool' and 'finnhubAnalysisTool'. + - **Final:** Consensus check via 'finnhubQuotesTool'. + - MANDATORY: - - Start with Polygon quote tools to establish baseline - - Combine technical + fundamental tools for signal confirmation - - Always cite specific numbers in recommendation - - Verify analysis across multiple data sources - - FORBIDDEN: - - Never recommend using only news/sentiment (use as confirmation only) - - Never skip fundamental analysis for technical only - - Never ignore major support/resistance levels - - WHEN PRICE CROSSES KEY LEVELS: - - Verify with fundamental metrics first - - Check analyst sentiment for confirmation - - Reassess risk metrics - - OUTPUT REQUIREMENTS: - Provide analysis as JSON with: symbol, currentPrice, technical analysis, fundamental analysis, sentiment, recommendation, priceTarget, risks, sources + - **Tool Efficiency:** Do NOT use the same tool repetitively or back-to-back for the same query. + - **Mandatory:** Start with Polygon quotes; combine technical + fundamental for confirmation. + - **Forbidden:** Never skip fundamental analysis; never rely solely on sentiment. + - **Output:** Provide JSON with symbol, currentPrice, technicals, fundamentals, sentiment, recommendation, priceTarget, risks, and sources. `, providerOptions: { diff --git a/src/mastra/agents/weather-agent.ts b/src/mastra/agents/weather-agent.ts index 92b9fe4d..3173bd0e 100644 --- a/src/mastra/agents/weather-agent.ts +++ b/src/mastra/agents/weather-agent.ts @@ -33,6 +33,8 @@ export const weatherAgent = new Agent({ - If the user asks for activities and provides the weather forecast, suggest activities based on the weather forecast. - If the user asks for activities, respond in the format they request. + - **Tool Efficiency:** Do NOT use the same tool repetitively or back-to-back for the same query. + Use the weatherTool to fetch current weather data. `, providerOptions: { diff --git a/src/mastra/config/pg-storage.ts b/src/mastra/config/pg-storage.ts index 14a0ed9a..0ccc6315 100644 --- a/src/mastra/config/pg-storage.ts +++ b/src/mastra/config/pg-storage.ts @@ -96,9 +96,9 @@ export const pgMemory = new Memory({ scope: 'resource', // 'resource' | 'thread' // HNSW index configuration to support high-dimensional embeddings (>2000 dimensions) indexConfig: { - type: 'flat', // flat index type (supports dimensions > 4000, unlike HNSW limit of 2000) + type: 'hnsw', // flat index type (supports dimensions > 4000, unlike HNSW limit of 2000) metric: 'cosine', // Distance metric for normalized embeddings - ivf: {lists: parseInt(process.env.LISTS ?? '3072')}, // IVF configuration for flat index + hnsw: { m: 16, efConstruction: 64 } // hnsw configuration }, threshold: 0.75, // Similarity threshold for semantic recall indexName: 'memory_messages_3072', // Index name for semantic recall @@ -108,36 +108,18 @@ export const pgMemory = new Memory({ enabled: true, scope: 'resource', // 'resource' | 'thread' version: 'vnext', - template: `# User Profile & Context -## Personal Information - - **Name**: - - **Role/Title**: - - **Organization**: - - **Location**: - - **Time Zone**: -## Communication Preferences - - **Preferred Communication Style**: - - **Response Length Preference**: - - **Technical Level**: - -## Current Context - - **Active Projects**: - - **Current Goals**: - - **Deadlines**: - - **Recent Activities**: - - **Pain Points**: - -## Long-term Memory - - **Key Achievements**: - - **Important Relationships**: - - **Recurring Patterns**: - - **Preferences & Habits**: - -## Session Notes - - **Today's Focus**: - - **Outstanding Questions**: - - **Action Items**: - - **Follow-ups Needed**: + template: `# User Context +## Profile +- Name/Role: +- Org/Loc: +- Style/Level: +## Active +- Goals/Projects: +- Recent/Deadlines: +## Insights +- Patterns/Habits: +- Session Focus: +- Action Items: `, }, } @@ -157,9 +139,9 @@ log.info('PG Store and Memory initialized with PgVector support', { }, scope: 'resource', indexConfig: { - type: 'flat', + type: 'hnsw', metric: 'cosine', - ivf: { lists: parseInt(process.env.LISTS ?? '2600') }, // Adjust list count based on your needs + hnsw: { m: 16, efConstruction: 64 } // hnsw configuration } }, workingMemory: { diff --git a/src/mastra/index.ts b/src/mastra/index.ts index ff981e60..7bf70bb9 100644 --- a/src/mastra/index.ts +++ b/src/mastra/index.ts @@ -94,7 +94,7 @@ export const mastra = new Mastra({ contentStudioWorkflow, changelogWorkflow, contentReviewWorkflow, - documentProcessingWorkflow, + documentProcessingWorkflow, financialReportWorkflow, learningExtractionWorkflow, researchSynthesisWorkflow, @@ -138,7 +138,7 @@ export const mastra = new Mastra({ dane, // Calendar and misc calendarAgent, -// bgColorAgent, + bgColorAgent, // Package publisher danePackagePublisher, // Financial Chart Agents @@ -175,13 +175,13 @@ export const mastra = new Mastra({ }, scorers: { }, -// mcpServers: { a2aCoordinator: a2aCoordinatorMcpServer, notes: notesMCP }, + mcpServers: { a2aCoordinator: a2aCoordinatorMcpServer, notes: notesMCP }, storage: new PostgresStore({ id: 'main-storage', // Connection configuration connectionString: - process.env.SUPABASE ?? + process.env.SUPABASE! ?? 'postgresql://user:password@localhost:5432/mydb', // Schema management schemaName: process.env.DB_SCHEMA ?? 'mastra', @@ -264,29 +264,6 @@ export const mastra = new Mastra({ sendReasoning: true, sendSources: true, }), - chatRoute({ - path: "/chat/weatherAgent", - agent: "weatherAgent", - defaultOptions: { - memory: { - thread: { - id: "weatherAgentChat", - resourceId: 'chat', - metadata: { agent: 'weatherAgent' } - }, - resource: "chat", - options: - { lastMessages: 500, semanticRecall: true, workingMemory: { enabled: true, }, threads: { generateTitle: true } }, - readOnly: false, - }, - maxSteps: 50, - includeRawChunks: true, - }, - sendStart: true, - sendFinish: true, - sendReasoning: true, - sendSources: true, - }), chatRoute({ path: "/custom/researchAgent", agent: "researchAgent", @@ -310,85 +287,63 @@ export const mastra = new Mastra({ sendReasoning: true, sendSources: true, }), - chatRoute({ - path: "/chat/bgColorAgent", - agent: "bgColorAgent", - defaultOptions: { - memory: { - thread: { - id: 'bgColorAgent', - resourceId: 'bgColorAgent', - }, - resource: "bgColorAgent", - options: - { lastMessages: 500, semanticRecall: true, workingMemory: { enabled: true, }, threads: { generateTitle: true } }, - readOnly: false, - }, - maxSteps: 50, - includeRawChunks: true, - }, - sendStart: true, - sendFinish: true, - sendReasoning: true, - sendSources: true, - }), ], middleware: [ // Populate RequestContext with real runtime values derived from headers (used by agents/tools) -// async (c, next) => { -// const country = c.req.header('CF-IPCountry') ?? '' -// const authHeader = c.req.header('Authorization') ?? '' -// const headerUserId = c.req.header('x-user-id') -// const headerUserTier = c.req.header('x-user-tier') -// const acceptLanguage = c.req.header('accept-language') ?? '' -// const researchPhaseHeader = c.req.header('x-research-phase') + async (c, next) => { + const country = c.req.header('CF-IPCountry') ?? '' + const authHeader = c.req.header('Authorization') ?? '' + const headerUserId = c.req.header('x-user-id') + const headerUserTier = c.req.header('x-user-tier') + const acceptLanguage = c.req.header('accept-language') ?? '' + const researchPhaseHeader = c.req.header('x-research-phase') -// const requestContext = c.get('requestContext') as RequestContext | undefined -// if (requestContext?.set) { + const requestContext = c.get('requestContext') as RequestContext | undefined + if (requestContext?.set) { // // Temperature unit (from Cloudflare geo header) -// const unit = country === 'US' ? 'fahrenheit' : 'celsius' -// requestContext.set('temperature-unit', unit) + const unit = country === 'US' ? 'fahrenheit' : 'celsius' + requestContext.set('temperature-unit', unit) - // userId: prefer explicit header, otherwise try to parse from a bearer token (format: "Bearer user:") -// let userId = headerUserId -// if (!userId && authHeader !== null && authHeader !== '' && authHeader.startsWith('Bearer ')) { -// const token = authHeader.slice('Bearer '.length) -// const exec = /user:([^;\s]+)/.exec(token) -// if (exec) { userId = exec[1] } -// } -// if (userId !== null && userId !== '') { requestContext.set('userId', userId) } + // userId: prefer explicit header, otherwise try to parse from a bearer token (format: "Bearer user:") + let userId = headerUserId + if (!userId && authHeader !== null && authHeader !== '' && authHeader.startsWith('Bearer ')) { + const token = authHeader.slice('Bearer '.length) + const exec = /user:([^;\s]+)/.exec(token) + if (exec) { userId = exec[1] } + } + if (userId !== null && userId !== '') { requestContext.set('userId', userId) } // user-tier: prefer explicit header, otherwise derive from token hints -// let userTier = headerUserTier -// if (!userTier && authHeader !== null && authHeader !== '' && authHeader.startsWith('Bearer ')) { -// const token = authHeader.slice('Bearer '.length) -// if (token.includes('enterprise')) { userTier = 'enterprise' } -// else if (token.includes('pro')) { userTier = 'pro' } -// else { userTier = 'free' } -// } -// if (userTier !== null && userTier !== '') { requestContext.set('user-tier', userTier) } + let userTier = headerUserTier + if (!userTier && authHeader !== null && authHeader !== '' && authHeader.startsWith('Bearer ')) { + const token = authHeader.slice('Bearer '.length) + if (token.includes('enterprise')) { userTier = 'enterprise' } + else if (token.includes('pro')) { userTier = 'pro' } + else { userTier = 'free' } + } + if (userTier !== null && userTier !== '') { requestContext.set('user-tier', userTier) } // language: prefer Accept-Language header (primary language subtag), fallback to 'en' -// const language = acceptLanguage.split(',')[0]?.split('-')[0] ?? 'en' -// const supported = ['en', 'es', 'ja', 'fr'] -// requestContext.set('language', supported.includes(language) ? language : 'en') + const language = acceptLanguage.split(',')[0]?.split('-')[0] ?? 'en' + const supported = ['en', 'es', 'ja', 'fr'] + requestContext.set('language', supported.includes(language) ? language : 'en') // research phase -// requestContext.set('researchPhase', researchPhaseHeader ?? 'initial') + requestContext.set('researchPhase', researchPhaseHeader ?? 'initial') // runtime API key (for tools that may accept runtimeContext.apiKey) - // if (apiKeyHeader !== null && apiKeyHeader !== '') { requestContext.set('apiKey', apiKeyHeader) } -// } +// if (apiKeyHeader !== null && apiKeyHeader !== '') { requestContext.set('apiKey', apiKeyHeader) } + } -// await next() -// }, - // Request timing logger -// async (c, next) => { -// const start = Date.now() -// await next() -// const duration = Date.now() - start - // log.info(`${c.req.method} ${c.req.url} - ${duration}ms`) - // }, + await next() + }, + //Request timing logger + async (c, next) => { + const start = Date.now() + await next() + const duration = Date.now() - start + log.info(`${c.req.method} ${c.req.url} - ${duration}ms`) + }, ] } }); diff --git a/src/mastra/tools/web-scraper-tool.ts b/src/mastra/tools/web-scraper-tool.ts index 36a9c663..b9b104bc 100644 --- a/src/mastra/tools/web-scraper-tool.ts +++ b/src/mastra/tools/web-scraper-tool.ts @@ -377,6 +377,43 @@ export class ValidationUtils { } // Input Schema +const crawlingOptionsSchema = z.object({ + depth: z.number().min(1).max(5).optional().describe('Maximum crawling depth for following links (default: 1, max: 5).'), + maxPages: z.number().min(1).max(100).optional().describe('Maximum number of pages to crawl (default: 1, max: 100).'), + followLinks: z.boolean().optional().describe('Whether to follow and crawl internal links (default: false).'), + respectRobotsTxt: z.boolean().optional().describe('Whether to check and respect robots.txt (default: true).'), + delayBetweenRequests: z.number().min(0).max(10000).optional().describe('Delay between requests in milliseconds for rate limiting (default: 1000).'), +}).optional(); + +const extractionOptionsSchema = z.object({ + includeImages: z.boolean().optional().describe('Whether to extract image URLs and metadata (default: false).'), + extractMetadata: z.boolean().optional().describe('Whether to extract page metadata (title, description, keywords, etc.) (default: true).'), + contentType: z.enum(['text', 'links', 'images', 'metadata', 'structured', 'all']).optional().describe('Type of content to prioritize extracting (default: "all").'), + extractStructuredData: z.boolean().optional().describe('Whether to extract structured data (JSON-LD, microdata, RDFa) (default: false).'), + languageDetection: z.boolean().optional().describe('Whether to detect and return content language (default: false).'), + contentFiltering: z.object({ + minLength: z.number().optional(), + maxLength: z.number().optional(), + keywords: z.array(z.string()).optional(), + excludePatterns: z.array(z.string()).optional(), + }).optional().describe('Content filtering options (min/max length, keywords, exclude patterns).'), +}).optional(); + +const requestOptionsSchema = z.object({ + timeout: z.number().min(1000).max(60000).optional().describe('Request timeout in milliseconds (default: 30000, max: 60000).'), + userAgent: z.string().optional().describe('Custom User-Agent string for requests.'), + headers: z.record(z.string(), z.string()).optional().describe('Custom HTTP headers to send with requests.'), + retryAttempts: z.number().min(0).max(5).optional().describe('Number of retry attempts for failed requests (default: 2, max: 5).'), + compression: z.boolean().optional().describe('Whether to handle compressed responses (gzip, deflate) (default: true).'), + cookies: z.record(z.string(), z.string()).optional().describe('Custom cookies to send with requests.'), +}).optional(); + +const storageOptionsSchema = z.object({ + saveMarkdown: z.boolean().optional().describe('Whether to save the scraped content as markdown to the data directory.'), + markdownFileName: z.string().optional().describe('Optional filename for the markdown file (relative to data/ directory).'), + outputFormat: z.enum(['json', 'markdown', 'html', 'text']).optional().describe('Output format for extracted content (default: "json").'), +}).optional(); + const webScraperInputSchema = z .object({ url: z @@ -397,106 +434,10 @@ const webScraperInputSchema = z .describe( "Array of HTML attributes to extract from selected elements (e.g., 'href', 'src', 'alt')." ), - saveMarkdown: z - .boolean() - .optional() - .describe( - 'Whether to save the scraped content as markdown to the data directory.' - ), - markdownFileName: z - .string() - .optional() - .describe( - 'Optional filename for the markdown file (relative to data/ directory). If not provided, a default name will be generated.' - ), - // Enhanced search tool options - depth: z - .number() - .min(1) - .max(5) - .optional() - .describe('Maximum crawling depth for following links (default: 1, max: 5).'), - maxPages: z - .number() - .min(1) - .max(100) - .optional() - .describe('Maximum number of pages to crawl (default: 1, max: 100).'), - followLinks: z - .boolean() - .optional() - .describe('Whether to follow and crawl internal links (default: false).'), - includeImages: z - .boolean() - .optional() - .describe('Whether to extract image URLs and metadata (default: false).'), - extractMetadata: z - .boolean() - .optional() - .describe('Whether to extract page metadata (title, description, keywords, etc.) (default: true).'), - contentType: z - .enum(['text', 'links', 'images', 'metadata', 'structured', 'all']) - .optional() - .describe('Type of content to prioritize extracting (default: "all").'), - timeout: z - .number() - .min(1000) - .max(60000) - .optional() - .describe('Request timeout in milliseconds (default: 30000, max: 60000).'), - userAgent: z - .string() - .optional() - .describe('Custom User-Agent string for requests.'), - headers: z - .record(z.string(), z.string()) - .optional() - .describe('Custom HTTP headers to send with requests.'), - retryAttempts: z - .number() - .min(0) - .max(5) - .optional() - .describe('Number of retry attempts for failed requests (default: 2, max: 5).'), - delayBetweenRequests: z - .number() - .min(0) - .max(10000) - .optional() - .describe('Delay between requests in milliseconds for rate limiting (default: 1000).'), - respectRobotsTxt: z - .boolean() - .optional() - .describe('Whether to check and respect robots.txt (default: true).'), - extractStructuredData: z - .boolean() - .optional() - .describe('Whether to extract structured data (JSON-LD, microdata, RDFa) (default: false).'), - languageDetection: z - .boolean() - .optional() - .describe('Whether to detect and return content language (default: false).'), - contentFiltering: z - .object({ - minLength: z.number().optional(), - maxLength: z.number().optional(), - keywords: z.array(z.string()).optional(), - excludePatterns: z.array(z.string()).optional(), - }) - .optional() - .describe('Content filtering options (min/max length, keywords, exclude patterns).'), - outputFormat: z - .enum(['json', 'markdown', 'html', 'text']) - .optional() - .describe('Output format for extracted content (default: "json").'), - compression: z - .boolean() - .optional() - .describe('Whether to handle compressed responses (gzip, deflate) (default: true).'), - cookies: z - .record(z.string(), z.string()) - .optional() - .describe('Custom cookies to send with requests.'), + crawling: crawlingOptionsSchema, + extraction: extractionOptionsSchema, + request: requestOptionsSchema, + storage: storageOptionsSchema, }) .strict() @@ -504,27 +445,6 @@ const webScraperInputSchema = z const webScraperOutputSchema = z .object({ url: z.url().describe('The URL that was scraped.'), - extractedData: z - .array(z.record(z.string(), z.string())) - .describe( - 'Array of extracted data, where each object represents an element and its extracted attributes/text.' - ), - rawContent: z - .string() - .optional() - .describe( - 'The full raw HTML content of the page (if no selector is provided).' - ), - markdownContent: z - .string() - .optional() - .describe('The scraped content converted to markdown format.'), - savedFilePath: z - .string() - .optional() - .describe( - 'Path to the saved markdown file (if saveMarkdown was true).' - ), status: z .string() .describe( @@ -534,27 +454,53 @@ const webScraperOutputSchema = z .string() .optional() .describe('Error message if the operation failed.'), - // Enhanced output fields - metadata: z - .record(z.string(), z.string()) - .optional() - .describe('Page metadata (title, description, keywords, etc.) extracted from meta tags.'), - images: z - .array(z.object({ - src: z.string(), - alt: z.string().optional(), - title: z.string().optional(), - })) - .optional() - .describe('Array of image URLs and metadata found on the page.'), - structuredData: z - .array(z.unknown()) - .optional() - .describe('Structured data extracted from JSON-LD and microdata.'), - detectedLanguage: z - .string() - .optional() - .describe('Detected content language from HTML lang attribute or meta tags.'), + content: z.object({ + extractedData: z + .array(z.record(z.string(), z.string())) + .describe( + 'Array of extracted data, where each object represents an element and its extracted attributes/text.' + ), + rawContent: z + .string() + .optional() + .describe( + 'The full raw HTML content of the page (if no selector is provided).' + ), + markdownContent: z + .string() + .optional() + .describe('The scraped content converted to markdown format.'), + }).describe('Scraped content results.'), + storage: z.object({ + savedFilePath: z + .string() + .optional() + .describe( + 'Path to the saved markdown file (if saveMarkdown was true).' + ), + }).optional().describe('Storage information.'), + analysis: z.object({ + metadata: z + .record(z.string(), z.string().optional()) + .optional() + .describe('Page metadata (title, description, keywords, etc.) extracted from meta tags.'), + images: z + .array(z.object({ + src: z.string(), + alt: z.string().optional(), + title: z.string().optional(), + })) + .optional() + .describe('Array of image URLs and metadata found on the page.'), + structuredData: z + .array(z.unknown()) + .optional() + .describe('Structured data extracted from JSON-LD and microdata.'), + detectedLanguage: z + .string() + .optional() + .describe('Detected content language from HTML lang attribute or meta tags.'), + }).optional().describe('Analysis results.'), }) .strict() @@ -590,7 +536,7 @@ export const webScraperTool = createTool({ log.info('Starting enhanced web scraping with JSDOM', { url: inputData.url, selector: inputData.selector, - saveMarkdown: inputData.saveMarkdown, + saveMarkdown: inputData.storage?.saveMarkdown, }) let rawContent: string | undefined @@ -609,24 +555,24 @@ export const webScraperTool = createTool({ try { const headers: Record = { - ...(inputData.headers ?? {}), + ...(inputData.request?.headers ?? {}), }; - if (typeof inputData.userAgent === 'string' && inputData.userAgent.trim() !== '') { - headers['user-agent'] = inputData.userAgent; + if (typeof inputData.request?.userAgent === 'string' && inputData.request.userAgent.trim() !== '') { + headers['user-agent'] = inputData.request.userAgent; } - const maxDepth = inputData.depth ?? 1; - const maxPages = inputData.maxPages ?? 1; - const followLinks = inputData.followLinks ?? false; - const retryAttempts = inputData.retryAttempts ?? 2; - const delayBetweenRequests = inputData.delayBetweenRequests ?? 1000; + const maxDepth = inputData.crawling?.depth ?? 1; + const maxPages = inputData.crawling?.maxPages ?? 1; + const followLinks = inputData.crawling?.followLinks ?? false; + const retryAttempts = inputData.request?.retryAttempts ?? 2; + const delayBetweenRequests = inputData.crawling?.delayBetweenRequests ?? 1000; const crawler = new CheerioCrawler({ maxRequestsPerCrawl: followLinks ? Math.min(maxPages, 50) : 1, maxConcurrency: 10, maxRequestRetries: retryAttempts, sameDomainDelaySecs: delayBetweenRequests / 1000, - requestHandlerTimeoutSecs: (inputData?.timeout ?? 30000) / 1000, + requestHandlerTimeoutSecs: (inputData.request?.timeout ?? 30000) / 1000, async requestHandler({ request, body, response, enqueueLinks }) { try { scrapedUrl = request.url @@ -694,7 +640,7 @@ export const webScraperTool = createTool({ } // Extract metadata if requested - if (inputData.extractMetadata !== false) { + if (inputData.extraction?.extractMetadata !== false) { const dom = new JSDOM(rawContent, { includeNodeLocations: false }) const { document } = dom.window @@ -709,7 +655,7 @@ export const webScraperTool = createTool({ } // Extract images if requested - if (inputData.includeImages ?? false) { + if (inputData.extraction?.includeImages ?? false) { const dom = new JSDOM(rawContent, { includeNodeLocations: false }) const { document } = dom.window const imgElements = document.querySelectorAll('img') @@ -727,7 +673,7 @@ export const webScraperTool = createTool({ } // Extract structured data if requested - if (inputData.extractStructuredData ?? false) { + if (inputData.extraction?.extractStructuredData ?? false) { const dom = new JSDOM(rawContent, { includeNodeLocations: false }) const { document } = dom.window @@ -767,7 +713,7 @@ export const webScraperTool = createTool({ } // Language detection (basic) - if (inputData.languageDetection ?? false) { + if (inputData.extraction?.languageDetection ?? false) { const dom = new JSDOM(rawContent, { includeNodeLocations: false }) const { document } = dom.window detectedLanguage = document.documentElement.getAttribute('lang') ?? @@ -899,16 +845,16 @@ export const webScraperTool = createTool({ await writer?.custom({ type: 'data-tool-progress', data: { status: 'in-progress', message: '✂️ Converting to markdown...', stage: 'web:scraper' }, id: 'web:scraper' }); if ( - inputData.saveMarkdown === true && + inputData.storage?.saveMarkdown === true && typeof markdownContent === 'string' && markdownContent.trim() !== '' ) { try { const fileName = - typeof inputData.markdownFileName === 'string' && - inputData.markdownFileName.trim() !== '' + typeof inputData.storage?.markdownFileName === 'string' && + inputData.storage.markdownFileName.trim() !== '' ? ValidationUtils.sanitizeFileName( - inputData.markdownFileName + inputData.storage.markdownFileName ) : `scraped_${new Date().toISOString().replace(/[:.]/g, '-')}.md` @@ -949,27 +895,31 @@ export const webScraperTool = createTool({ } } -await writer?.custom({ type: 'data-tool-progress', data: { status: 'done', message: `✅ Scraping complete: ${extractedData.length} elements${(savedFilePath !== null) ? ', saved to ' + savedFilePath : ''}`, stage: 'web:scraper' }, id: 'web:scraper' }); +await writer?.custom({ type: 'data-tool-progress', data: { status: 'done', message: `✅ Scraping complete: ${extractedData.length} elements${(typeof savedFilePath === 'string') ? ', saved to ' + savedFilePath : ''}`, stage: 'web:scraper' }, id: 'web:scraper' }); return webScraperOutputSchema.parse({ url: scrapedUrl, - extractedData, - rawContent: - inputData.selector !== null - ? undefined - : typeof rawContent === 'string' && - rawContent.trim().length > 0 - ? rawContent - : undefined, - markdownContent, - savedFilePath, status, errorMessage, - metadata: Object.keys(metadata).length > 0 ? metadata : undefined, - images: images.length > 0 ? images : undefined, - structuredData: structuredData.length > 0 ? structuredData : undefined, - detectedLanguage, + content: { + extractedData, + rawContent: + (typeof inputData.selector === 'string' && inputData.selector.trim() !== '') + ? undefined + : typeof rawContent === 'string' && + rawContent.trim().length > 0 + ? rawContent + : undefined, + markdownContent, + }, + storage: typeof savedFilePath === 'string' ? { savedFilePath } : undefined, + analysis: { + metadata: Object.keys(metadata).length > 0 ? metadata : undefined, + images: images.length > 0 ? images : undefined, + structuredData: structuredData.length > 0 ? structuredData : undefined, + detectedLanguage, + } }) } catch (error) { errorMessage = `Web scraping failed: ${error instanceof Error ? error.message : String(error)}`