Experiment: Incase a document is missing (latest report), can the agent pull in the PDF from a data source?

In [None]:
from typing import Literal
from tavily import TavilyClient
from langchain_openai import ChatOpenAI
import requests
import os
from dotenv import load_dotenv
import fitz

load_dotenv()


# embeddings = OpenAIEmbeddings(
#     model="text-embedding-3-small",
# )

# collection_name = "transcripts"

# client = QdrantClient(url="http://localhost:6333")
# vector_store = QdrantVectorStore(
#     client=client,
#     collection_name=collection_name,
#     embedding=embeddings,
# )

In [None]:
def internet_search(query: str):
    """Search the internet to fetch real-time data"""
    tavily_async_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
    search_docs = tavily_async_client.search(
        query,
        include_raw_content=True,
        topic="finance",
    )
    return search_docs


def get_financial_data(topic: str, time_quarter: Literal["Q1", "Q2", "Q3", "Q4"]):
    """Search the knowledge base for information on a topic."""
    return f"No financial data found for {topic} with filter = {time_quarter} quarter."


def save_file(file_url: str):
    """Save a PDF file to the knowledge base using it's URL"""
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
        "Accept": "application/pdf",
        # "Referer": "https://www.bseindia.com/",
    }

    response = requests.get(url=file_url, headers=headers)

    if response.status_code == 200:
        with open("temp.pdf", "wb") as f:
            f.write(response.content)

        doc = fitz.open("temp.pdf")

        text_content = ""
        for page_num in range(1, len(doc)):  # skipping the first page
            page = doc.load_page(page_num)
            text_content += page.get_text()

        return f"Document with text preview {text_content[:30]}.... successfully saved."

    else:
        raise Exception(f"Failed to retrieve PDF. Status code: {response.status_code}")

In [None]:
from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import InMemorySaver

checkpointer = InMemorySaver()

agent = create_react_agent(
    prompt="""You are an expert financial report generator. If you are asked about a company's financial performance, you will first search the internal knowledgebase for the most accurate data. Incase the knowledge base does not contain a piece of information, you are allowed to use the internet tool to fetch relevant files and save them for future use.
    Think step by step.""",
    tools=[
        internet_search,
        get_financial_data,
        save_file,
    ],
    model=ChatOpenAI(model="gpt-4o-mini", temperature=0.0),
    debug=True,
    checkpointer=checkpointer,
)

In [None]:
agent.invoke(
    {
        "messages": [
            {
                "role": "user",
                "content": "What is the financial performance of TCS in Q1 2023?",
            }
        ]
    },
    config={"configurable": {"thread_id": "1"}},
)