In [1]:
import os
from dataclasses import dataclass, field
from pathlib import Path

from omegaconf import OmegaConf, ValidationError

from pdf_rag.react_agent_multi_pdfs import ReActAgentMultiPdfs
from dotenv import load_dotenv

load_dotenv()


@dataclass
class ReActAgentConfig:
    data_dir: Path | str
    api_key_gemini: str | None = None
    api_key_mistral: str | None = None
    num_workers: int = 16
    chunks_top_k: int = 5
    nodes_top_k: int = 10
    max_iterations: int = 20
    verbose: bool = True
    queries: list[str] = field(default_factory=list)

    def __post_init__(self):
        self.data_dir = Path(self.data_dir)
        self.root_dir = self.data_dir / "pdfs"
        self.pdfs_dir = self.data_dir / "pdfs"
        self.cache_dir = self.data_dir / "cache"
        self.storage_dir = self.data_dir / "storage_queries"

        self.api_key_gemini = self.api_key_gemini or os.environ.get("GEMINI_API_KEY")
        self.api_key_mistral = self.api_key_mistral or os.environ.get("MISTRAL_API_KEY")
        if not self.api_key_gemini:
            raise ValueError(
                "Gemini API Key is required. Provide api_key_gemini or set GEMINI_API_KEY environment variable."
            )
        if not self.api_key_mistral:
            raise ValueError(
                "Mistral API Key is required. Provide api_key_mistral or set MISTRAL_API_KEY environment variable."
            )


def load_and_validate_config(config_path: str) -> ReActAgentConfig:
    try:
        config = OmegaConf.load(config_path)
        react_agent_schema = OmegaConf.structured(ReActAgentConfig)  # (**config)
        react_agent_config = OmegaConf.merge(react_agent_schema, config)
        react_agent_config = ReActAgentConfig(**react_agent_config)
        print("Configuration loaded and validated successfully:")
        print(str(react_agent_config))
        return react_agent_config
    except ValidationError as e:
        raise ValidationError(f"Validation error: {e}")
    except Exception as e:
        raise Exception(f"Error loading configuration: {e}")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
config_path = "../configs/query_multi_pdfs_nicolas.yaml"
config = load_and_validate_config(config_path)

react_agent_multi_pdfs = ReActAgentMultiPdfs(
    api_key_gemini=config.api_key_gemini,
    api_key_mistral=config.api_key_mistral,
    root_dir=config.root_dir,
    pdfs_dir=config.pdfs_dir,
    cache_dir=config.cache_dir,
    storage_dir=config.storage_dir,
    num_workers=config.num_workers,
    chunks_top_k=config.chunks_top_k,
    nodes_top_k=config.nodes_top_k,
    max_iterations=config.max_iterations,
    verbose=config.verbose,
)

responses = react_agent_multi_pdfs.process_queries(queries=config.queries)
for q, r in zip(config.queries, responses):
    print(30 * "-")
    print(f"Query: {q}")
    print(f"Response: {r}")

Configuration loaded and validated successfully:
ReActAgentConfig(data_dir=PosixPath('/home/nicolas/PycharmProjects/pdf-rag/data'), api_key_gemini='AIzaSyCyMbWnyyfeMtf5hs71gHjZZTYGXefqX9M', api_key_mistral='BXPZRvrvXLFsCKYtvJmffkOIjWBCzdXV', num_workers=16, chunks_top_k=5, nodes_top_k=10, max_iterations=20, verbose=True, queries=['What are the vulnerabilities introduced by relying on application programming interfaces (APIs) in Banking as a Service (BaaS)?', 'What mitigation opportunities are there to ensure strong security for BaaS platforms and API connectivity?', 'How can the industry best improve due diligence on BaaS providers in this landscape?', 'What are the common objectives of the Open Data ecosystem?', 'What are key strategic decisions to be made by ecosystem participants?', 'How can the public and private sectors collaborate to promote innovation, secure data sharing, and data privacy within the Open Data ecosystem?', 'What are the key characteristics that define the Leader

Building agents: 100%|██████████| 6/6 [00:00<00:00, 15.40it/s]


> Running step 4f3be120-619c-492b-ad8f-00338f87745f. Step input: What are the vulnerabilities introduced by relying on application programming interfaces (APIs) in Banking as a Service (BaaS)?
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: tool_deloitte-tech-risk-sector-banking
Action Input: {'input': 'vulnerabilities of APIs in Banking as a Service (BaaS)'}
[0m> Running step 18bd750d-36c4-4dbc-9b26-0f67e7aa1e4e. Step input: vulnerabilities of APIs in Banking as a Service (BaaS)
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: vector_tool_deloitte-tech-risk-sector-banking
Action Input: {'input': 'API vulnerabilities in Banking as a Service (BaaS)'}
[0m[1;3;34mObservation: BaaS increasingly relies on application programming interfaces, introducing vulnerabilities that can pose risks for banks, such as customers' sensitiv