In [7]:
from dotenv import load_dotenv
load_dotenv()
! pip install pypdf

Collecting pypdf
  Downloading pypdf-5.5.0-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.5.0-py3-none-any.whl (303 kB)
Installing collected packages: pypdf
Successfully installed pypdf-5.5.0


In [22]:
from langchain_openai import ChatOpenAI

# local_llm = "llama3.2:3b-instruct-fp16"
# llm = ChatOllama(model=local_llm, temperature=0)
# llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")

from typing import Optional

from pydantic import BaseModel, Field


# Pydantic
class RetriverScore(BaseModel):
    """Result of PDF reader"""
    score: str = Field(description="The score of the retriever")

llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
    # api_key="...",  # if you prefer to pass api key in directly instaed of using env vars
    # base_url="...",
    # organization="...",
    # other params...
).with_structured_output(RetriverScore)

In [23]:
from langchain_openai import OpenAIEmbeddings

from langchain_community.vectorstores import Chroma
from langchain_core.vectorstores import InMemoryVectorStore

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Set embeddings
embd = OpenAIEmbeddings()

def read_pdfs_and_split(): 
  filename = "/Users/james/projects/learnings/AI_Learnings/langchain/langgraph-test/data_sources/2025q1-alphabet-earnings-release.pdf"
  loader = PyPDFLoader(file_path=filename)
  docs = []
  docs = loader.load()

  text_splitter = RecursiveCharacterTextSplitter(
      chunk_size=2000,
      chunk_overlap=30,
      add_start_index=True)

  all_splits = text_splitter.split_documents(docs)
  return all_splits

all_splits = read_pdfs_and_split()

# Add to vectorstore
"""
vectorstore = Chroma.from_documents(
  documents=all_splits,
  collection_name="rag-chroma",
  embedding=embd,
)
"""
vectorstore = InMemoryVectorStore.from_documents(all_splits, embd)

retriever = vectorstore.as_retriever()

In [34]:
### Router
import json
from langchain_core.messages import HumanMessage, SystemMessage

# Prompt
router_instructions = """You are an expert at routing a user question to a vectorstore or web search.

The vectorstore contains documents related to earning reports for different companies.

Use the vectorstore for questions on these topics. For all else, and especially for current events, use web-search.

Return JSON with single key, datasource, that is 'websearch' or 'vectorstore' depending on the question."""

# Test router
test_web_search = llm.invoke(
    [SystemMessage(content=router_instructions)]
    + [
        HumanMessage(
            content="What's google revenu in Q1?"
        )
    ]
)
test_web_search_2 = llm.invoke(
    [SystemMessage(content=router_instructions)]
    + [HumanMessage(content="Where is google headerquater")]
)
test_vector_store = llm.invoke(
    [SystemMessage(content=router_instructions)]
    + [HumanMessage(content="Give me Google First Quarter 2025 Results")]
)
print(
    print(test_web_search.content),
    print(test_web_search_2.content),
    print(test_vector_store.content),
)

AttributeError: 'RetriverScore' object has no attribute 'content'

In [30]:
### Retrieval Grader

# Doc grader instructions
doc_grader_instructions = """You are a grader assessing relevance of a retrieved document to a user question.

If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant."""

# Grader prompt
doc_grader_prompt = """Here is the retrieved document: \n\n {document} \n\n Here is the user question: \n\n {question}. 

This carefully and objectively assess whether the document contains at least some information that is relevant to the question.

Return RetrieverScore object, that score is 'yes' or 'no' to indicate whether the document contains at least some information that is relevant to the question."""

# Test
question = "Google"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
doc_grader_prompt_formatted = doc_grader_prompt.format(
    document=doc_txt, question=question
)
result = llm.invoke(
    [SystemMessage(content=doc_grader_instructions)]
    + [HumanMessage(content=doc_grader_prompt_formatted)]
)
print(result)

score='yes'


In [33]:
import logging
from langgraph.graph import StateGraph
from IPython.display import Image, display


from langchain.schema import Document
from langgraph.graph import END

import operator
from typing_extensions import TypedDict
from typing import List, Annotated


class GraphState(TypedDict):
    """
    Graph state is a dictionary that contains information we want to propagate to, and modify in, each graph node.
    """

    question: str  # User question
    generation: str  # LLM generation
    agent: str  # Binary decision to run agent
    max_retries: int  # Max number of retries for answer generation
    answers: int  # Number of answers generated
    loop_step: Annotated[int, operator.add]
    documents: List[str]  # List of retrieved documents

def getLLM(outputSchema): 
    """
    Get LLM instance based on the output schema
    """
    from langchain_openai import ChatOpenAI

# local_llm = "llama3.2:3b-instruct-fp16"
# llm = ChatOllama(model=local_llm, temperature=0)
# llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")

from typing import Optional
from pydantic import BaseModel
from langchain_openai import ChatOpenAI

from langchain_core.messages import HumanMessage, SystemMessage

from .response_models import RetriverScore, RouteResponse, LLMResponse, DocGraderResponse

from .retriever_rag import retrieve_docs


# Setup module logger
logger = logging.getLogger(__name__)

# Prompt
router_instructions = """
You are a professional equity research analyst. Analyze the stock by a given stock name or code based on its latest quarterly earnings, valuation metrics (P/E, P/S, EV/EBITDA), competitive positioning, and current macroeconomic trends. Include the following in your report:

1. Company Overview – brief business summary and key revenue streams

2. Recent Financial Performance – revenue, EPS, margins (QoQ and YoY)

3. Valuation Comparison 

4. Technical Analysis – key support/resistance levels, moving averages

5. Catalysts & Risks – what might drive the stock price up or down in the next 6–12 months

6. Buy/Hold/Sell Recommendation – with a price target and rationale

If vectorstore contains documents related to the question, use them to answer the question.

If the vectorstore does not contain documents related to the question, use web search to answer the question.

Return RouteResponse response include one key, datasource, that is 'agent' or 'vectorstore' depending on the question."""




def getLLM(outputModel: Optional[BaseModel] = None):
    """
    Get LLM instance based on the output schema
    """
    llm = ChatOpenAI(
        model="gpt-4o",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2
        # api_key="...",  # if you prefer to pass api key in directly instaed of using env vars
        # base_url="...",
        # organization="...",
        # other params...
    ).with_structured_output(outputModel)
    return llm

### Nodes

def generate(state):
    """
    Generate answer using RAG on retrieved documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
    loop_step = state.get("loop_step", 0)

    # RAG generation
#    docs_txt = format_docs(documents)
#    rag_prompt_formatted = rag_prompt.format(context=docs_txt, question=question)
#    generation = llm.invoke([HumanMessage(content=rag_prompt_formatted)])
    generation = "dummy generation"
    return {"generation": generation, "loop_step": loop_step + 1}


def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question
    If any document is not relevant, we will set a flag to run web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Filtered out irrelevant documents and updated web_search state
    """

    # Doc grader instructions
    doc_grader_instructions = """You are a grader assessing relevance of a retrieved document to a user question.

    If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant."""


    # Grader prompt
    doc_grader_prompt = """
    Here is the retrieved document: \n\n {document} \n\n 
    Here is the user question: \n\n {question}. 
    
    This carefully and objectively assess whether the document contains at least some information that is relevant to the question.
    
    Return DocGraderResponse response, score, that is 'yes' or 'no' score to indicate whether the document contains at least some information that is relevant to the question.
    """


    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Score each doc
    filtered_docs = []
    agent = "No"
    for d in documents:
        doc_grader_prompt_formatted = doc_grader_prompt.format(
            document=d.page_content, question=question
        )
        result = getLLM(DocGraderResponse).invoke(
            [SystemMessage(content=doc_grader_instructions)]
            + [HumanMessage(content=doc_grader_prompt_formatted)]
        )
        grade = result["score"]
        # Document relevant
        if grade.lower() == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        # Document not relevant
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            # We do not include the document in filtered_docs
            # We set a flag to indicate that we want to run web search
            agent = "Yes"
            continue
    return {"documents": filtered_docs, "agent": agent}


def call_model(state: GraphState):
    """
    Call the LLM model to generate an answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Appended web results to documents
    """

    print("---WEB SEARCH---")
    messages = state["question"]
    response = getLLM(LLMResponse).invoke(messages)
    return { "generation": response }


### Edges
def route_question(state):
    """
    Route question to web search or RAG

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---ROUTE QUESTION---")
    
    
    route_question = getLLM(RouteResponse).invoke(
        [SystemMessage(content=router_instructions)]
        + [HumanMessage(content=state["question"])]
    )
    source = route_question["datasource"]
    if source == "websearch":
        print("---ROUTE QUESTION TO WEB SEARCH---")
        return "websearch"
    elif source == "vectorstore":
        print("---ROUTE QUESTION TO RAG---")
        return "vectorstore"


def decide_to_generate(state):
    """
    Determines whether to generate an answer, or add web search

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    question = state["question"]
    web_search = state["web_search"]
    filtered_documents = state["documents"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print(
            "---DECISION: NOT ALL DOCUMENTS ARE RELEVANT TO QUESTION, INCLUDE WEB SEARCH---"
        )
        return "websearch"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"


def run_workflow():
    """
    Run the workflow
    """
    workflow = StateGraph(GraphState)


    # Define the nodes
    workflow.add_node("run_agent", call_model)  # web search
    workflow.add_node("retrieve", retrieve_docs)  # retrieve
    workflow.add_node("grade_documents", grade_documents)  # grade documents
    workflow.add_node("generate", generate)  # generate

    # Build graph
    workflow.set_conditional_entry_point(
        route_question,
        {
            "agent": "run_agent",
            "vectorstore": "retrieve",
        },
    )
    workflow.add_edge("run_agent", END)
    workflow.add_edge("retrieve", "grade_documents")
    workflow.add_conditional_edges(
        "grade_documents",
        decide_to_generate,
        {
            "agent": "run_agent",
            "generate": "generate",
        },
    )
    workflow.add_edge(
        "generate",
        END
    )

    # Compile
    graph = workflow.compile()
    display(Image(graph.get_graph().draw_mermaid_png()))



run_workflow()

ImportError: attempted relative import with no known parent package