In [1]:
from typing import List, TypedDict
from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_qdrant import Qdrant
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langgraph.graph import START, StateGraph
from langgraph.checkpoint.memory import MemorySaver
from langchain.schema import Document


In [2]:
#note: pypdf also records title, page numeber etc required by chain-5

from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader

#loader = DirectoryLoader('/home/tom/Python/Tools/RAG and PDF/docs', glob="**/*.pdf", loader_cls=UnstructuredPDFLoader, show_progress=True)
loader = DirectoryLoader('docs', glob="**/*.pdf", loader_cls=PyMuPDFLoader, show_progress=True)
repo_files = loader.load()


100%|███████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.35it/s]


In [3]:

print(f"Number of files loaded: {len(repo_files)}")
#
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
documents = text_splitter.split_documents(documents=repo_files)
print(f"Number of documents : {len(documents)}")

Number of files loaded: 13
Number of documents : 63


In [4]:
from langchain_ollama import OllamaEmbeddings
#from langchain_community.vectorstores import Qdrant
from langchain_qdrant import Qdrant

In [5]:
model_name = "nomic-embed-text"
embeddings = OllamaEmbeddings(model=model_name)

In [6]:
#Restore db

qdrant = Qdrant.from_existing_collection(
 embeddings,
 path="langchain_local_qdrant_pdf",
 collection_name="my_documents",
)

In [7]:
retriever = qdrant.as_retriever()

In [12]:
prompt = PromptTemplate(
    template="""You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
    Use three sentences maximum and keep the answer concise:
    Question: {question} 
    Context: {context} 
    Answer: 
    """,
    input_variables=["question", "context"],
)

local_model = "llama3.2"
llm = ChatOllama(model=local_model, temperature=0)
rag_chain = prompt | llm | StrOutputParser()

In [13]:
class GraphState(TypedDict):
    question: str
    generation: str
    documents: List[Document]

def retrieve(state: GraphState):
    """
    Retrieve documents from vectorstore
    """
    print("---RETRIEVE---")
    question = state["question"]
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}

def generate(state: GraphState):
    """
    Generate answer using RAG on retrieved documents
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
    
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    generation = rag_chain.invoke({"context": formatted_docs, "question": question})
    return {"documents": documents, "question": question, "generation": generation}


In [14]:
workflow = StateGraph(GraphState)
workflow.add_edge(START, "retrieve")
workflow.add_node("retrieve", retrieve)
workflow.add_edge("retrieve", "generate")
workflow.add_node("generate", generate)

<langgraph.graph.state.StateGraph at 0x78d38059a6d0>

In [15]:
# Add memory for checkpointing
memory = MemorySaver()

# Compile the workflow
app = workflow.compile(checkpointer=memory)

In [None]:
# Step 7: Run a test question
import uuid


thread_id = uuid.uuid4()
config = {"configurable": {"thread_id": thread_id}}


state = {"question": "What is agent memory?"}
output = app.stream(state, config)
print("---FINAL OUTPUT STREAM---")
for step in output:
    print(step)

In [None]:
step