In [1]:
%pip install -qU langchain_community pypdf

Note: you may need to restart the kernel to use updated packages.


In [12]:
from dotenv import load_dotenv
load_dotenv()

from langchain_community.document_loaders import PyPDFLoader

filename = "/home/james/projects/learnings/AI_Learnings/langchain/langgraph-test/data_sources/2025q1-alphabet-earnings-release.pdf"
loader = PyPDFLoader(file_path=filename)
docs = []
docs = loader.load()

In [13]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=30,
    add_start_index=True)

all_splits = text_splitter.split_documents(docs)

print(f"Split the pdf into {len(all_splits)} sub-documents.")

Split the pdf into 17 sub-documents.


In [14]:
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore

# embedding model
embed_model = OpenAIEmbeddings(model="text-embedding-3-large")

# vector storage
vector_store = InMemoryVectorStore(embedding=embed_model)

# add document chunks to the vector store to get index chunks
document_ids = vector_store.add_documents(documents=all_splits)

In [15]:
from typing import TypedDict, List
from IPython.display import Image
from langchain import hub
from langchain_core.documents import Document
from langgraph.graph import START, END, StateGraph
from langchain_openai import ChatOpenAI


prompt = hub.pull('rlm/rag-prompt')

llm = ChatOpenAI(model='gpt-4o')

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

def retrieve(state: State):
    print("--Retrieving Information--")
    question = state['question']
    retrieved_docs = vector_store.similarity_search(query=question,k=4)
    return {'context': retrieved_docs}

def generate(state: State):
    print("--Generating Response--")
    docs_contents =  "\n\n".join([doc.page_content for doc in state["context"]])
    messages = prompt.invoke({'question': state['question'], 'context': state['context']})
    response = llm.invoke(messages)
    return {'answer': response}
        
builder = StateGraph(State)

builder.add_node("retrieve", retrieve)
builder.add_node("generate", generate)

builder.add_edge(START, "retrieve")
builder.add_edge("retrieve", "generate")
builder.add_edge("generate", END)

graph = builder.compile()

Image(graph.get_graph().draw_mermaid_png())



ValueError: Failed to reach https://mermaid.ink/ API while trying to render your graph after 1 retries. To resolve this issue:
1. Check your internet connection and try again
2. Try with higher retry settings: `draw_mermaid_png(..., max_retries=5, retry_delay=2.0)`
3. Use the Pyppeteer rendering method which will render your graph locally in a browser: `draw_mermaid_png(..., draw_method=MermaidDrawMethod.PYPPETEER)`