In [1]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langgraph.prebuilt import create_react_agent
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.messages import HumanMessage
from langgraph.prebuilt import create_react_agent
from typing import Dict
from config import Settings
import os

from pydantic import BaseModel, Field
import operator
from typing import Annotated, List, Tuple
from typing_extensions import TypedDict

In [2]:
api_key = Settings.api_key
VECTOR_DIR = 'vectorize'
embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    api_key=api_key)

In [None]:
file_paths = ["data/nvidia_10k_21.pdf", "data/lyft_10k_21.pdf"]

for file_path in file_paths:
    loader = PyPDFLoader(file_path)
    document = loader.load()
    company_name = file_path.split("/")[1].split("_")[0]
    print("Company: ", company_name)
    save_dir = VECTOR_DIR + "/" + company_name
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    vector_store = FAISS.from_documents(document, embeddings)
    vector_store.save_local(save_dir)

In [4]:
llm = ChatOpenAI(model="gpt-4o", api_key=api_key)

SYSTEM_TEMPLATE = """
    Answer the user's questions based on the below context. 
    If the context doesn't contain any relevant information to the questions, don't make someting up
    and just reply information cannot be fount:
    <context>
    {context}
    </context>
    """

In [3]:
def parse_retriever_input(params: Dict):
    return params["messages"][-1].content

def contextualQA(earnings_question: str, company_name: str) -> str:

    """
    Identifies the relevant context in the earning call transcripts to the user question

    This tool searches in the earnings calls transcript documents and extract financial information
    such as net income, REvenue, EBITDA and etc.

    Parameters:
    - earnings_question: The questions asked by the user
    - the company for which we need to answer the question. Company names should always be lowercase

    Returns:
    - A string with the context that contain the answer to the earnings question.
    """
    vector_store = FAISS.load_local(VECTOR_DIR + "/" + company_name, 
                                    embeddings, 
                                    allow_dangerous_deserialization=True)
    retriever = vector_store.as_retriever()
    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", SYSTEM_TEMPLATE),
            MessagesPlaceholder(variable_name="messages"),
        ]
    )
    qa_chain = create_stuff_documents_chain(llm, qa_prompt)
    retrieval_chain = RunnablePassthrough.assign(
    context = parse_retriever_input | retriever).assign(answer=qa_chain)
    response = retrieval_chain.invoke(
        {
            "messages": [HumanMessage(content=earnings_question)]
        }
    )
    return response["answer"]


In [None]:
class PlanExecute(TypedDict):
    input: str
    plan: List[str]
    past_steps: Annotated[List[Tuple], operator.add]
    response: str



class Plan(BaseModel):
    """Plan to follow in future"""

    steps: List[str] = Field(
        description="different steps to follow, should be in sorted order"
    )