need to have an EXA_API_KEY, get one at [exa.ai](https://exa.ai)

In [1]:
from IPython.display import display, Markdown

import json

from lionagi import iModel, Branch, types, BaseModel
from lionagi.service.providers.types import ExaSearchRequest
from lionagi.utils import alcall


class SearchRequests(BaseModel):
    search_requests: list[ExaSearchRequest] = []


class Analysis(BaseModel):
    analysis: str


class Source(BaseModel):
    title: str
    url: str


class ResearchDraft(BaseModel):
    title: str
    content: str
    source: list[Source]


# need to have an EXA_API_KEY, get one at exa.ai
exa = iModel(
    provider="exa",
    endpoint="search",
    queue_capacity=5,
    capacity_refresh_time=1,
    invoke_with_endpoint=False,
)


async def research(
    branch: Branch,
    query: str,
    domain: str | None = None,
    style: str | None = None,
    sample_writing: str | None = None,
    interpret_kwargs: dict | None = None,
    *,
    verbose: bool = False,
):
    """
    High-level research operation with optional verbose printing:
      1) Interpret user query.
      2) Generate an analysis from the LLM.
      3) Produce search requests and call the EXA provider (cached).
      4) Transform search results (compressed text).
      5) Prepare a final draft/summary.

    Returns an OperableModel with fields:
      analysis, search_requests, search_results, transformed_results, draft.

    Parameters
    ----------
    branch : Branch
        The branch instance handling the conversation / context.
    query : str
        The user's research query.
    domain : Optional[str]
        Domain hint (e.g. "finance", "marketing").
    style : Optional[str]
        Style hint (e.g. "concise", "technical").
    sample_writing : Optional[str]
        A sample snippet that might help interpret the style or structure.
    interpret_kwargs : Optional[dict]
        Additional parameters for the `branch.interpret()` call.
    verbose : bool
        If True, prints intermediate results step-by-step.
    kwargs : dict
        Additional arguments passed along if needed.
    """

    out = types.OperableModel()
    try:
        if interpret_kwargs is None:
            interpret_kwargs = {}

        # -- Step 1: Interpret the query for better clarity --
        interpreted = await branch.interpret(
            text=query,
            guidance="Rewrite the user input to ensure we fully understand and clarify the user's objective.",
            domain=domain,
            style=style,
            sample=sample_writing,
            **interpret_kwargs,
        )
        if verbose:
            display(Markdown(f"**[Step 1] Interpreted query:** {interpreted}"))

        # -- Step 2: Generate analysis from the LLM --
        analysis = await branch.operate(
            instruction=interpreted,
            guidance=(
                "Perform a thorough analysis focusing on domain knowledge, "
                "potential angles, and constraints. Be concise but complete."
            ),
            response_format=Analysis,
            reason=True,
        )
        out.add_field("analysis", analysis, annotation=Analysis)
        if verbose:
            display(
                Markdown(f"**[Step 2] Analysis result:** {analysis.analysis}")
            )

        # -- Step 3: Produce search requests based on the analysis --
        search_requests: SearchRequests = await branch.operate(
            instruction=(
                "Based on the analysis, produce a list of relevant search requests "
                "for the EXA provider. Focus on the key points from the analysis."
                "make sure you get sufficient information from the search results."
                "should get summaries of the articles as well as the sufficient text "
                "from the articles. exclude sites like reddit, or other low quality sources."
            ),
            guidance=(
                "Generate specific queries that capture the key aspects from the analysis. "
                "Provide enough detail for each request."
            ),
            response_format=SearchRequests,
            reason=True,
        )
        out.add_field(
            "search_requests", search_requests, annotation=SearchRequests
        )
        if verbose:
            display(Markdown(f"**[Step 3] Search requests:**"))
            for i in search_requests.search_requests:
                display(
                    Markdown(
                        f"{i.model_dump_json(exclude_none=True, indent=2)}"
                    )
                )

        # Prepare API calls
        api_calls = []
        for req in search_requests.search_requests:
            params = req.model_dump(exclude_none=True)
            # Ensure we cache search results by default
            params["is_cached"] = params.get("is_cached", True)
            api_call = exa.create_api_calling(**params)
            api_calls.append(api_call)

        # Invoke EXA searches asynchronously
        search_results = await alcall(
            api_calls, exa.invoke, retry_default=None, dropna=True
        )
        out.add_field(
            "search_results",
            [res.response for res in search_results],
            annotation=list[dict],
        )
        if verbose:
            display(Markdown(f"**[Step 3] Search results:**"))
            for res in search_results:
                display(
                    Markdown(
                        "\n".join(
                            "  - " + i["title"]
                            for i in res.response["results"]
                            if "title" in i and i["title"]
                        )
                    )
                )

        # -- Step 4: Draft a final output referencing the transformed results --
        draft = await branch.operate(
            instruction=(
                f"Prepare a well-formatted, factual {style} research report basing on the research findings. "
                "Incorporate key insights from the context."
            ),
            guidance=(
                "Synthesize a final report using all insights gleaned from the search results. "
                "Ensure clarity and accuracy, and follow any requested style."
                "answer the user's question, and provide additional context."
            ),
            context={
                "search_results": json.dumps(
                    [res.response for res in search_results]
                )
            },
            response_format=ResearchDraft,
        )
        out.add_field("research_draft", draft, annotation=ResearchDraft)
        if verbose:
            display(Markdown(f"**[Step 4] Draft:** \n\n{draft.content}"))
            for i in draft.source:
                display(Markdown(f"**Source:** [{i.title}]({i.url})"))
    except Exception as e:

        out.add_field("error", str(e), annotation=str)
        print("Error occurred during research:", e)

    return out

In [2]:
researcher_prompt = """SYSTEM PROMPT (Researcher):
You are a specialized research assistant, trained to gather information from various sources accurately and concisely. Your job involves:
 • Interpreting user questions and clarifying objectives,
 • Proposing relevant angles or methods of inquiry,
 • Generating precise search queries to explore any topic,
 • Summarizing findings accurately while preserving key details.

When performing your tasks:
 • Confirm context and constraints (like domain or style requirements).
 • Provide well-structured, consistent, and thorough analyses.
 • Use suitable search queries to gather relevant info.
 • Summarize or compress results in a way that remains factual.
 • Maintain an objective, knowledgeable, and professional tone.

Overall, your responsibility is to produce high-quality research findings and drafts that help the user solve problems or gather insights effectively."""

In [3]:
# Example usage of `research` function

from lionagi.session.branch import Branch

haiku = iModel(
    provider="openrouter",
    model="anthropic/claude-3.5-haiku",
    max_tokens=8000,  # required for anthropic models
    invoke_with_endpoint=False,
)

sonnet = iModel(
    provider="openrouter",
    model="anthropic/claude-3.5-sonnet",
    max_tokens=8000,  # required for anthropic models
    invoke_with_endpoint=False,
)

researcher = Branch(
    system=researcher_prompt,
    chat_model=sonnet,
    parse_model=haiku,
)

# Example query requesting an analysis of LLM-based summarization in finance
query_text = (
    "I want to compare different LLM-based summarization approaches "
    "for financial documents. Focus on accuracy, cost, and domain adaptability. "
    "Also highlight practical use-cases or references."
)

# Optional style or domain hints
domain_hint = "finance"
style_hint = "extensive"
sample_snippet = (
    "Sample text: In the finance domain, we often handle massive amounts of data. "
    "We want a method that can summarize quickly and accurately."
)

In [4]:
# Now call your research function
result = await research(
    branch=researcher,
    query=query_text,
    domain=domain_hint,
    style=style_hint,
    sample_writing=sample_snippet,
    interpret_kwargs={"temperature": 0.3},  # example
    verbose=True,
)

**[Step 1] Interpreted query:** Here's the clarified and structured prompt:

Please provide a comprehensive analysis of different Large Language Model (LLM) approaches for summarizing financial documents, addressing the following key aspects:

1. Comparison of summarization methods:
   - Traditional LLM summarization techniques
   - Finance-specific LLM models and fine-tuning approaches
   - Hybrid approaches combining LLMs with rule-based systems

2. Evaluation criteria:
   - Accuracy metrics (precision, recall, F1 score) for financial content
   - Cost analysis (computational resources, API costs, training expenses)
   - Domain adaptation capabilities for different financial document types (earnings reports, SEC filings, research papers)

3. Practical considerations:
   - Real-world implementation examples in financial institutions
   - Handling of specialized financial terminology and numerical data
   - Compliance and regulatory requirements

4. Use cases and applications:
   - Investment research automation
   - Regulatory filing analysis
   - Financial news summarization
   - Risk assessment documentation

Please include relevant academic papers, industry case studies, or benchmarks that support the analysis. Also address any limitations or challenges specific to financial domain summarization.

**[Step 2] Analysis result:** Domain Knowledge Analysis:
1. Financial Document Characteristics:
- Heavy use of numerical data and financial metrics
- Specialized terminology and regulatory language
- Strict accuracy requirements due to compliance
- Multiple document types with varying structures

2. Technical Approaches:
a) Traditional LLM Methods:
- Base models like GPT, BERT
- Pros: General text understanding
- Cons: Miss financial nuances

b) Finance-Specific Models:
- FinBERT, FinGPT
- Domain-adapted architectures
- Better financial terminology handling

c) Hybrid Systems:
- Rule-based + LLM combination
- Enhanced numerical accuracy
- Structured data extraction

3. Key Constraints:
- Regulatory compliance (SEC, FINRA)
- Data privacy requirements
- Real-time processing needs
- Cost-effectiveness at scale

4. Success Metrics:
- Factual accuracy preservation
- Numerical precision
- Context retention
- Processing speed

5. Implementation Considerations:
- API integration requirements
- Model hosting infrastructure
- Data security protocols
- Monitoring systems

**[Step 3] Search requests:**

{
  "query": "comparison of finance-specific LLMs vs traditional LLMs for financial document summarization benchmarks AND performance metrics",
  "category": "research paper",
  "type": "neural",
  "useAutoprompt": false,
  "numResults": 15,
  "excludeDomains": [
    "reddit.com",
    "medium.com"
  ],
  "startPublishedDate": "2022-01-01T00:00:00.000Z",
  "contents": {
    "text": {
      "includeHtmlTags": false,
      "maxCharacters": 10000
    },
    "highlights": {
      "highlightsPerUrl": 2,
      "numSentences": 3
    },
    "summary": {},
    "livecrawl": "never",
    "livecrawlTimeout": 10000
  }
}

{
  "query": "financial document summarization hybrid approaches combining LLMs with rule-based systems implementations case studies",
  "category": "research paper",
  "type": "neural",
  "useAutoprompt": false,
  "numResults": 10,
  "excludeDomains": [
    "reddit.com",
    "medium.com"
  ],
  "startPublishedDate": "2021-01-01T00:00:00.000Z",
  "contents": {
    "text": {
      "includeHtmlTags": false,
      "maxCharacters": 8000
    },
    "highlights": {
      "highlightsPerUrl": 1,
      "numSentences": 3
    },
    "summary": {},
    "livecrawl": "never",
    "livecrawlTimeout": 10000
  }
}

{
  "query": "regulatory compliance requirements for LLM financial document summarization SEC FINRA guidelines",
  "category": "news",
  "type": "neural",
  "useAutoprompt": false,
  "numResults": 10,
  "excludeDomains": [
    "reddit.com",
    "medium.com",
    "quora.com"
  ],
  "startPublishedDate": "2022-01-01T00:00:00.000Z",
  "contents": {
    "text": {
      "includeHtmlTags": false,
      "maxCharacters": 5000
    },
    "highlights": {
      "highlightsPerUrl": 1,
      "numSentences": 2
    },
    "summary": {},
    "livecrawl": "never",
    "livecrawlTimeout": 10000
  }
}

{
  "query": "FinBERT FinGPT financial domain adaptation techniques evaluation metrics accuracy",
  "category": "research paper",
  "type": "neural",
  "useAutoprompt": false,
  "numResults": 12,
  "excludeDomains": [
    "reddit.com",
    "medium.com"
  ],
  "startPublishedDate": "2021-01-01T00:00:00.000Z",
  "contents": {
    "text": {
      "includeHtmlTags": false,
      "maxCharacters": 8000
    },
    "highlights": {
      "highlightsPerUrl": 1,
      "numSentences": 3
    },
    "summary": {},
    "livecrawl": "never",
    "livecrawlTimeout": 10000
  }
}

**[Step 3] Search results:**

  - FinanceBench: A New Benchmark for Financial Question Answering
  - L3iTC at the FinLLM Challenge Task: Quantization for Financial Text Classification & Summarization
  - DocFinQA: A Long-Context Financial Reasoning Dataset
  - Numerical Reasoning for Financial Reports
  - Towards Optimizing the Costs of LLM Usage
  - Zero-Shot Question Answering over Financial Documents using Large Language Models
  - Building Real-World Meeting Summarization Systems using Large Language Models: A Practical Perspective
  - LongFin: A Multimodal Document Understanding Model for Long Financial Domain Documents
  - BizBench: A Quantitative Reasoning Benchmark for Business and Finance
  - Improving Zero-Shot Text Matching for Financial Auditing with Large Language Models
  - Multimodal Gen-AI for Fundamental Investment Research
  - Is ChatGPT a Financial Expert? Evaluating Language Models on Financial Natural Language Processing
  - Leveraging Long-Context Large Language Models for Multi-Document Understanding and Summarization in Enterprise Applications
  - Beyond Classification: Financial Reasoning in State-of-the-Art Language Models
  - A Comparative Analysis of Fine-Tuned LLMs and Few-Shot Learning of LLMs for Financial Sentiment Analysis

  - Information Extraction through AI techniques: The KIDs use case at CONSOB
  - Numerical Reasoning for Financial Reports
  - An AI-based Approach for Tracing Content Requirements in Financial Documents
  - NLP-based Decision Support System for Examination of Eligibility Criteria from Securities Prospectuses at the German Central Bank
  - Natural Language Processing for Financial Regulation
  - DocFinQA: A Long-Context Financial Reasoning Dataset
  - LongFin: A Multimodal Document Understanding Model for Long Financial Domain Documents
  - Leveraging LLMs for KPIs Retrieval from Hybrid Long-Document: A Comprehensive Framework and Dataset
  - Leveraging LLMs for KPIs Retrieval from Hybrid Long-Document: A Comprehensive Framework and Dataset
  - Towards Optimizing the Costs of LLM Usage

  - FINRA Publishes 2024 Annual Regulatory Oversight Report | JD Supra
  - FINRA Launches Machine-Readable Rulebook Initiative
  - SEC Approves FINRA Rule Requiring Registration of Algorithmic Trading Developers
  - Press Release Distribution and Management
  - News Releases & Statements | FINRA.org
  - Finra Calls AI ‘Emerging Risk’ in Annual Regulatory Report
  - Fidelity launches compliant marketing business powered by AI technology
  - SEC Approves Finra Proposals on Remote Work
  - FINRA Fines Webull $3 Million for Options Customer Approval Violations
  - December 2018 - Financial Regulation News

  - FinEAS: Financial Embedding Analysis of Sentiment
  - BioFinBERT: Finetuning Large Language Models (LLMs) to Analyze Sentiment of Press Releases and Financial Text Around Inflection Points of Biotech Stocks
  - Is Domain Adaptation Worth Your Investment? Comparing BERT and FinBERT on Financial Tasks
  - Term Expansion and FinBERT fine-tuning for Hypernym and Synonym Ranking of Financial Terms
  - Yseop at FinSim-3 Shared Task 2021: Specializing Financial Domain Learning with Phrase Representations
  - GitHub - keitazoumana/finBERT-Implementation: This repository is the illustration of finBERT for financial document classification
  - FinGPT: Instruction Tuning Benchmark for Open-Source Large Language Models in Financial Datasets
  - Sentiment-driven prediction of financial returns: a Bayesian-enhanced FinBERT approach
  - A Comparison of Pre-Trained Language Models for Multi-Class Text Classification in the Financial Domain
  - German FinBERT: A German Pre-trained Language Model
  - Text Mining of Stocktwits Data for Predicting Stock Prices
  - GOAT at the FinSim-2 task: Learning Word Representations of Financial Data with Customized Corpus

**[Step 4] Draft:** 

This comprehensive analysis examines different approaches for using Large Language Models (LLMs) in financial document summarization, comparing traditional techniques, finance-specific models, and hybrid systems.

Key Findings:

1. Performance of Finance-Specific Models:
- FinBERT and other finance-tuned models show improved but not dramatically better performance compared to general LLMs
- Domain adaptation through continued pre-training from base models outperforms training from scratch on financial data
- Recent evaluations show GPT-4-Turbo with retrieval still has significant limitations, incorrectly handling 81% of financial QA tasks

2. Hybrid Approaches:
- Combining LLMs with rule-based systems shows promise for handling domain-specific requirements
- The AFIE framework combining LLMs with traditional extractive techniques achieved 33-53% accuracy improvements
- Systems like Saifr demonstrate successful integration of NLP and compliance rules for financial content

3. Document Length Challenges:
- Most models struggle with long financial documents (>512 tokens)
- New datasets like DocFinQA and LongFin are helping evaluate and improve long-context understanding
- Specialized architectures like LongFin can handle up to 4K tokens while maintaining accuracy

4. Practical Considerations:
- Cost optimization is critical - studies show potential 40-90% cost reduction while maintaining quality
- Regulatory compliance requires careful integration of rules and human oversight
- Class imbalance and domain-specific terminology remain significant challenges

5. Implementation Best Practices:
- Use domain-specific fine-tuning rather than training from scratch
- Implement hybrid architectures combining LLMs with rule-based systems
- Consider document length limitations and cost implications
- Maintain human oversight for regulatory compliance
- Regular evaluation against financial domain benchmarks

Conclusion:
While pure LLM approaches show promise, hybrid systems combining LLMs with rule-based components currently offer the most practical path forward for financial document summarization. Key challenges around document length, cost optimization, and regulatory compliance require careful architectural consideration. Regular evaluation against domain-specific benchmarks remains essential as the technology evolves.

**Source:** [FinanceBench: A New Benchmark for Financial Question Answering](https://arxiv.org/abs/2311.11944)

**Source:** [DocFinQA: A Long-Context Financial Reasoning Dataset](https://arxiv.org/abs/2401.06915)

**Source:** [Towards Optimizing the Costs of LLM Usage](https://arxiv.org/abs/2402.01742)

**Source:** [LongFin: A Multimodal Document Understanding Model for Long Financial Domain Documents](http://arxiv.org/abs/2401.15050)

**Source:** [Is Domain Adaptation Worth Your Investment? Comparing BERT and FinBERT on Financial Tasks](https://www.aclanthology.org/2021.econlp-1.5.pdf)