In [1]:
from IPython.display import display, Markdown

import json

from lionagi import iModel, Branch, types, BaseModel
from lionagi.service.providers.exa_.types import ExaSearchRequest
from lionagi.utils import alcall


class SearchRequests(BaseModel):
    search_requests: list[ExaSearchRequest] = []


class Analysis(BaseModel):
    analysis: str


class Source(BaseModel):
    title: str
    url: str


class ResearchDraft(BaseModel):
    title: str
    content: str
    source: list[Source]


exa = iModel(
    provider="exa",
    endpoint="search",
    queue_capacity=5,
    capacity_refresh_time=1,
    invoke_with_endpoint=False,
)


async def research(
    branch: Branch,
    query: str,
    domain: str | None = None,
    style: str | None = None,
    sample_writing: str | None = None,
    interpret_kwargs: dict | None = None,
    *,
    verbose: bool = False,
):
    """
    High-level research operation with optional verbose printing:
      1) Interpret user query.
      2) Generate an analysis from the LLM.
      3) Produce search requests and call the EXA provider (cached).
      4) Transform search results (compressed text).
      5) Prepare a final draft/summary.

    Returns an OperableModel with fields:
      analysis, search_requests, search_results, transformed_results, draft.

    Parameters
    ----------
    branch : Branch
        The branch instance handling the conversation / context.
    query : str
        The user's research query.
    domain : Optional[str]
        Domain hint (e.g. "finance", "marketing").
    style : Optional[str]
        Style hint (e.g. "concise", "technical").
    sample_writing : Optional[str]
        A sample snippet that might help interpret the style or structure.
    interpret_kwargs : Optional[dict]
        Additional parameters for the `branch.interpret()` call.
    verbose : bool
        If True, prints intermediate results step-by-step.
    kwargs : dict
        Additional arguments passed along if needed.
    """

    out = types.OperableModel()
    try:
        if interpret_kwargs is None:
            interpret_kwargs = {}

        # -- Step 1: Interpret the query for better clarity --
        interpreted = await branch.interpret(
            text=query,
            guidance="Rewrite the user input to ensure we fully understand and clarify the user's objective.",
            domain=domain,
            style=style,
            sample=sample_writing,
            **interpret_kwargs,
        )
        if verbose:
            display(Markdown(f"**[Step 1] Interpreted query:** {interpreted}"))

        # -- Step 2: Generate analysis from the LLM --
        analysis = await branch.operate(
            instruction=interpreted,
            guidance=(
                "Perform a thorough analysis focusing on domain knowledge, "
                "potential angles, and constraints. Be concise but complete."
            ),
            response_format=Analysis,
            reason=True,
        )
        out.add_field("analysis", analysis, annotation=Analysis)
        if verbose:
            display(
                Markdown(f"**[Step 2] Analysis result:** {analysis.analysis}")
            )

        # -- Step 3: Produce search requests based on the analysis --
        search_requests: SearchRequests = await branch.operate(
            instruction=(
                "Based on the analysis, produce a list of relevant search requests "
                "for the EXA provider. Focus on the key points from the analysis."
                "make sure you get sufficient information from the search results."
                "exclude sites like reddit, or other low quality sources."
            ),
            guidance=(
                "Generate specific queries that capture the key aspects from the analysis. "
                "Provide enough detail for each request."
            ),
            response_format=SearchRequests,
            reason=True,
        )
        out.add_field(
            "search_requests", search_requests, annotation=SearchRequests
        )
        if verbose:
            display(Markdown(f"**[Step 3] Search requests:**"))
            for i in search_requests.search_requests:
                display(
                    Markdown(
                        f"{i.model_dump_json(exclude_none=True, indent=2)}"
                    )
                )

        # Prepare API calls
        api_calls = []
        for req in search_requests.search_requests:
            params = req.model_dump(exclude_none=True)
            # Ensure we cache search results by default
            params["is_cached"] = params.get("is_cached", True)
            api_call = exa.create_api_calling(**params)
            api_calls.append(api_call)

        # Invoke EXA searches asynchronously
        search_results = await alcall(
            api_calls, exa.invoke, retry_default=None, dropna=True
        )
        out.add_field(
            "search_results",
            [res.response for res in search_results],
            annotation=list[dict],
        )
        if verbose:
            display(Markdown(f"**[Step 3] Search results:**"))
            for res in search_results:
                display(
                    Markdown(
                        "\n".join(
                            "  - " + i["title"]
                            for i in res.response["results"]
                        )
                    )
                )

        # -- Step 4: Draft a final output referencing the transformed results --
        draft = await branch.operate(
            instruction=(
                "Prepare a well-formatted, factual research report basing on the research findings. "
                "Incorporate key insights from the context."
            ),
            guidance=(
                "Synthesize a final summary using all insights gleaned from the search results. "
                "Ensure clarity and accuracy, and follow any requested style."
                "answer the user's question, and provide additional context."
            ),
            context={
                "search_results": json.dumps(
                    [res.response for res in search_results]
                )
            },
            response_format=ResearchDraft,
        )
        out.add_field("research_draft", draft, annotation=ResearchDraft)
        if verbose:
            display(Markdown(f"**[Step 4] Draft:** \n\n{draft.content}"))

    except Exception as e:

        out.add_field("error", str(e), annotation=str)
        print("Error occurred during research:", e)

    return out

In [2]:
researcher_prompt = """SYSTEM PROMPT (Researcher):
You are a specialized research assistant, trained to gather information from various sources accurately and concisely. Your job involves:
 • Interpreting user questions and clarifying objectives,
 • Proposing relevant angles or methods of inquiry,
 • Generating precise search queries to explore any topic,
 • Summarizing findings accurately while preserving key details.

When performing your tasks:
 • Confirm context and constraints (like domain or style requirements).
 • Provide well-structured, consistent, and thorough analyses.
 • Use suitable search queries to gather relevant info.
 • Summarize or compress results in a way that remains factual.
 • Maintain an objective, knowledgeable, and professional tone.

Overall, your responsibility is to produce high-quality research findings and drafts that help the user solve problems or gather insights effectively."""

In [3]:
# Example usage of `research` function

from lionagi.session.branch import Branch

haiku = iModel(
    provider="openrouter",
    model="anthropic/claude-3.5-haiku",
    max_tokens=8000,  # required for anthropic models
    invoke_with_endpoint=False,
)

sonnet = iModel(
    provider="openrouter",
    model="anthropic/claude-3.5-sonnet",
    max_tokens=8000,  # required for anthropic models
    invoke_with_endpoint=False,
)

researcher = Branch(
    system=researcher_prompt,
    chat_model=sonnet,
    parse_model=haiku,
)

# Example query requesting an analysis of LLM-based summarization in finance
query_text = (
    "I want to compare different LLM-based summarization approaches "
    "for financial documents. Focus on accuracy, cost, and domain adaptability. "
    "Also highlight practical use-cases or references."
)

# Optional style or domain hints
domain_hint = "finance"
style_hint = "extensive"
sample_snippet = (
    "Sample text: In the finance domain, we often handle massive amounts of data. "
    "We want a method that can summarize quickly and accurately."
)

In [4]:
# Now call your research function
result = await research(
    branch=researcher,
    query=query_text,
    domain=domain_hint,
    style=style_hint,
    sample_writing=sample_snippet,
    interpret_kwargs={"temperature": 0.3},  # example
    verbose=True,
)

**[Step 1] Interpreted query:** Here's the clarified and structured prompt:

Please provide a comprehensive analysis of Large Language Model (LLM) approaches for financial document summarization, addressing the following key aspects:

1. Comparison of different LLM summarization methods:
   - Zero-shot vs. few-shot approaches
   - Fine-tuned models vs. general-purpose models
   - Extractive vs. abstractive summarization techniques

2. Evaluation criteria:
   - Accuracy metrics (ROUGE scores, human evaluation)
   - Cost considerations (token usage, computational resources)
   - Domain adaptation capabilities for financial texts
   - Handling of specialized financial terminology and numerical data

3. Practical implementation considerations:
   - Real-world use cases in financial institutions
   - Regulatory compliance and data security requirements
   - Integration with existing document management systems
   - Scalability for large document volumes

4. Specific financial document types to consider:
   - Annual reports (10-K, 10-Q)
   - Financial news articles
   - Research reports
   - Regulatory filings
   - Earnings call transcripts

Please include relevant academic studies, industry implementations, or benchmark results where available.

**[Step 2] Analysis result:** Domain Knowledge Analysis:

1. LLM Summarization Methods
- Zero-shot vs Few-shot:
  • Zero-shot better for standardized documents (10-K reports)
  • Few-shot excels with domain-specific terminology
  • Few-shot requires carefully curated examples

- Model Selection:
  • Fine-tuned models show superior performance on financial texts
  • Domain-adapted models better handle numerical data
  • General models struggle with financial jargon

- Summarization Techniques:
  • Extractive better preserves numerical accuracy
  • Abstractive provides more coherent narratives
  • Hybrid approaches show promise for financial documents

2. Key Constraints:

Technical:
- Token limits impact long document processing
- GPU memory requirements for large models
- Real-time processing needs

Regulatory:
- Data privacy (GDPR, CCPA)
- Audit trail requirements
- Model explainability needs

Domain-specific:
- Numerical accuracy preservation
- Financial terminology handling
- Temporal context maintenance

3. Implementation Considerations:

Practical:
- Batch processing for large document volumes
- API integration capabilities
- Error handling protocols

Scalability:
- Distributed processing architecture
- Load balancing requirements
- Storage optimization needs

4. Document-Specific Approaches:

Annual Reports:
- Section-wise processing
- Key metrics extraction
- Year-over-year comparison focus

Earnings Calls:
- Speaker diarization handling
- Q&A section summarization
- Forward-looking statement identification

Research Reports:
- Citation preservation
- Methodology extraction
- Findings prioritization

5. Evaluation Framework:

Accuracy Metrics:
- ROUGE scores for content alignment
- Domain-specific metrics for financial accuracy
- Human evaluation for practical usability

Performance Metrics:
- Processing time per document
- Resource utilization
- Cost per summary

6. Recommendations:

- Implement hybrid approach combining extractive and abstractive methods
- Use fine-tuned models for specialized documents
- Establish robust validation pipeline
- Include human-in-the-loop for critical documents
- Regular model retraining with domain updates

**[Step 3] Search requests:**

{
  "query": "comparison zero shot few shot fine-tuning LLM financial document summarization evaluation metrics ROUGE scores",
  "category": "research paper",
  "type": "neural",
  "useAutoprompt": false,
  "numResults": 15,
  "excludeDomains": [
    "reddit.com",
    "medium.com",
    "quora.com"
  ],
  "startPublishedDate": "2022-01-01T00:00:00.000Z",
  "contents": {
    "highlights": {
      "highlightsPerUrl": 1,
      "numSentences": 3
    },
    "summary": {},
    "livecrawl": "never",
    "livecrawlTimeout": 10000
  }
}

{
  "query": "financial document LLM summarization case studies banks investment firms implementation challenges",
  "category": "research paper",
  "type": "neural",
  "useAutoprompt": false,
  "numResults": 10,
  "excludeDomains": [
    "reddit.com",
    "medium.com",
    "quora.com"
  ],
  "startPublishedDate": "2022-01-01T00:00:00.000Z",
  "includeText": [
    "case study",
    "implementation"
  ],
  "contents": {
    "highlights": {
      "highlightsPerUrl": 1,
      "numSentences": 3
    },
    "summary": {},
    "livecrawl": "never",
    "livecrawlTimeout": 10000
  }
}

{
  "query": "regulatory compliance data security requirements LLM financial document processing GDPR CCPA",
  "category": "research paper",
  "type": "neural",
  "useAutoprompt": false,
  "numResults": 10,
  "excludeDomains": [
    "reddit.com",
    "medium.com",
    "quora.com"
  ],
  "startPublishedDate": "2022-01-01T00:00:00.000Z",
  "contents": {
    "highlights": {
      "highlightsPerUrl": 1,
      "numSentences": 3
    },
    "summary": {},
    "livecrawl": "never",
    "livecrawlTimeout": 10000
  }
}

{
  "query": "extractive vs abstractive summarization financial reports numerical data preservation techniques",
  "category": "research paper",
  "type": "neural",
  "useAutoprompt": false,
  "numResults": 10,
  "excludeDomains": [
    "reddit.com",
    "medium.com",
    "quora.com"
  ],
  "startPublishedDate": "2022-01-01T00:00:00.000Z",
  "contents": {
    "highlights": {
      "highlightsPerUrl": 1,
      "numSentences": 3
    },
    "summary": {},
    "livecrawl": "never",
    "livecrawlTimeout": 10000
  }
}

{
  "query": "technical architecture scalability large language models financial document processing distributed systems",
  "category": "research paper",
  "type": "neural",
  "useAutoprompt": false,
  "numResults": 10,
  "excludeDomains": [
    "reddit.com",
    "medium.com",
    "quora.com"
  ],
  "startPublishedDate": "2022-01-01T00:00:00.000Z",
  "contents": {
    "highlights": {
      "highlightsPerUrl": 1,
      "numSentences": 3
    },
    "summary": {},
    "livecrawl": "never",
    "livecrawlTimeout": 10000
  }
}

ERROR:root:API call to https://api.exa.ai/search failed: catching classes that do not inherit from BaseException is not allowed


**[Step 3] Search results:**

  - Combining State-of-the-Art Models with Maximal Marginal Relevance for Few-Shot and Zero-Shot Multi-Document Summarization
  - A Comparative Analysis of Fine-Tuned LLMs and Few-Shot Learning of LLMs for Financial Sentiment Analysis
  - Benchmarking Large Language Models for News Summarization
  - Open-source Large Language Models are Strong Zero-shot Query Likelihood Models for Document Ranking
  - Less is More for Long Document Summary Evaluation by LLMs
  - Low-Resource Court Judgment Summarization for Common Law Systems
  - Revisiting the Gold Standard: Grounding Summarization Evaluation with Robust Human Evaluation
  - Enhancing Large Language Model Performance To Answer Questions and Extract Information More Accurately
  - UniSumEval: Towards Unified, Fine-Grained, Multi-Dimensional Summarization Evaluation for LLMs
  - DocAsRef: A Pilot Empirical Study on Repurposing Reference-Based Summary Quality Metrics Reference-Freely
  - Multi-Document Financial Question Answering using LLMs
  - Improving Zero-Shot Text Matching for Financial Auditing with Large Language Models
  - Summary of a Haystack: A Challenge to Long-Context LLMs and RAG Systems
  - Financial Knowledge Large Language Model
  - Towards Optimizing the Costs of LLM Usage

  - Documentation
  - Building data management capabilities to address data protection regulations: Learnings from EU-GDPR
  - Best Practices for CCPA & GDPR Compliance | Micro Focus
  - AI-Enabled Automation for Completeness Checking of Privacy Policies
  - Data protection rules applicable to Financial Intelligence Units: still no clarity in sight
  - Support for Enhanced GDPR Accountability with the Common Semantic Model for ROPA (CSM-ROPA)
  - Data Security on the Ground: Investigating Technical and Legal Requirements under the GDPR
  - Know Your Customer: Balancing innovation and regulation for financial inclusion | Data & Policy | Cambridge Core
  - GDPR and unstructured data: is anonymization possible?
  - Data Protection Act 2018

  - Numerical Reasoning for Financial Reports
  - Long Text and Multi-Table Summarization: Dataset and Method
  - L3iTC at the FinLLM Challenge Task: Quantization for Financial Text Classification & Summarization
  - Information Extraction through AI techniques: The KIDs use case at CONSOB
  - Towards reducing hallucination in extracting information from financial reports using Large Language Models
  - NumHTML: Numeric-Oriented Hierarchical Transformer Model for Multi-task Financial Forecasting
  - Leveraging LLMs for KPIs Retrieval from Hybrid Long-Document: A Comprehensive Framework and Dataset
  - Leveraging LLMs for KPIs Retrieval from Hybrid Long-Document: A Comprehensive Framework and Dataset
  - Financial Report Chunking for Effective Retrieval Augmented Generation
  - REFinD: Relation Extraction Financial Dataset

  - DocFinQA: A Long-Context Financial Reasoning Dataset
  - Distributed Inference and Fine-tuning of Large Language Models Over The Internet
  - Enabling Cross-Language Data Integration and Scalable Analytics in Decentralized Finance
  - Efficient Parallelization Layouts for Large-Scale Distributed Model Training
  - Delivering Document Conversion as a Cloud Service with High Throughput and Responsiveness
  - LongFin: A Multimodal Document Understanding Model for Long Financial Domain Documents
  - Shai: A large language model for asset management
  - DistServe: Disaggregating Prefill and Decoding for Goodput-optimized Large Language Model Serving
  - Chiplet Cloud: Building AI Supercomputers for Serving Large Generative Language Models
  - Paper page - MegaScale: Scaling Large Language Model Training to More Than 10,000
  GPUs

**[Step 4] Draft:** 

This research report synthesizes current approaches and best practices for using Large Language Models (LLMs) in financial document summarization.

Key Findings:

1. Summarization Methods
- Zero-shot vs Few-shot: Recent studies show few-shot learning performs better with domain-specific financial terminology, while zero-shot works well for standardized documents like 10-K reports. One study found increasing the number of few-shot examples did not significantly improve performance.

- Fine-tuning vs General Models: Research demonstrates fine-tuned models achieve superior performance on financial texts, particularly for preserving numerical accuracy. Fine-tuned smaller models (250M-3B parameters) can match larger models' performance with domain-specific training.

- Extractive vs Abstractive: Extractive methods better preserve numerical data accuracy, while abstractive provides more coherent narratives. Hybrid approaches combining both show promising results.

2. Accuracy and Evaluation
- Numerical Data Preservation: Studies highlight the importance of preserving numerical accuracy in financial summaries. New evaluation frameworks like FINE dataset specifically test numerical extraction accuracy.
- ROUGE Scores: While commonly used, research suggests ROUGE scores alone are insufficient for financial documents. Multiple studies recommend complementary metrics focused on numerical accuracy.
- Human Evaluation: Recent work emphasizes the need for domain expert evaluation, particularly for assessing factual accuracy in financial contexts.

3. Technical Implementation
- Scalability: Distributed processing architectures show promise for handling large document volumes, with some implementations processing over 1M pages per hour.
- Data Security: Research emphasizes implementing robust security measures meeting GDPR/CCPA requirements, particularly for financial data processing.
- System Integration: Studies recommend hybrid approaches combining LLM capabilities with existing document management systems.

Recommendations:
1. Implement hybrid summarization approaches combining extractive methods for numerical data with abstractive techniques for narrative sections
2. Use domain-adapted fine-tuned models for specialized financial documents
3. Establish multi-metric evaluation frameworks including numerical accuracy metrics
4. Deploy distributed processing architecture for handling large document volumes
5. Ensure compliance with data protection regulations through appropriate security measures

Limitations and Future Work:
- Need for improved methods to handle extremely long financial documents
- Further research required on maintaining temporal context in multi-period financial reports
- Development of standardized evaluation metrics for financial document summarization

In [5]:
for i in result.research_draft.source:
    display(Markdown(f"**Source:** [{i.title}]({i.url})"))

**Source:** [A Comparative Analysis of Fine-Tuned LLMs and Few-Shot Learning of LLMs for Financial Sentiment Analysis](https://arxiv.org/abs/2312.08725)

**Source:** [Leveraging LLMs for KPIs Retrieval from Hybrid Long-Document: A Comprehensive Framework and Dataset](https://arxiv.org/abs/2305.16344)

**Source:** [Numerical Reasoning for Financial Reports](http://arxiv.org/abs/2312.14870)

**Source:** [Data Protection Act 2018](https://www.legislation.gov.uk/ukpga/2018/12/contents)

**Source:** [DocFinQA: A Long-Context Financial Reasoning Dataset](https://arxiv.org/abs/2401.06915)