In [1]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="llama3",
    temperature=0,
)

In [2]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="nomic-embed-text")

In [3]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [4]:
from langchain_community.document_loaders import PDFMinerLoader

loader = PDFMinerLoader("/home/sneha/langchain-rag/IndianConstitution_Eng.pdf")
docs = loader.load()

In [5]:

assert len(docs) == 1
print(f"Total characters: {len(docs[0].page_content)}")

Total characters: 800392


In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

Split blog post into 1065 sub-documents.


In [7]:
from tqdm import tqdm
from langchain_community.vectorstores import Chroma

# Create or load your Chroma vector store
vector_store = Chroma(embedding_function=embeddings, persist_directory="chroma_db")


# Add documents in batches with a progress bar
for i in tqdm(range(0, len(all_splits)), desc="Indexing chunks"):
    chunk = [all_splits[i]]
    vector_store.add_documents(chunk)

# Save the vector store to disk
vector_store.persist()


  vector_store = Chroma(embedding_function=embeddings, persist_directory="chroma_db")
Indexing chunks: 100%|██████████| 1065/1065 [05:11<00:00,  3.42it/s]
  vector_store.persist()


In [8]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "(context goes here)", "question": "(question goes here)"}
).to_messages()

assert len(example_messages) == 1
print(example_messages[0].content)



You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: (question goes here) 
Context: (context goes here) 
Answer:


In [9]:
from langchain_core.documents import Document
from typing_extensions import List, TypedDict


class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

In [10]:
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [11]:
from langgraph.graph import START, StateGraph

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [13]:
result = graph.invoke({"question": "What are some law related to women?"})

print(f'Context: {result["context"]}\n\n')
print(f'Answer: {result["answer"]}')

Context: [Document(metadata={'producer': 'Acrobat Distiller 5.0 (Windows)', 'start_index': 46605, 'creationdate': '2005-11-11T12:20:49+05:30', 'total_pages': 291, 'source': '/home/sneha/langchain-rag/IndianConstitution_Eng.pdf', 'moddate': '2005-11-16T12:43:55+05:30', 'title': 'PREFACE', 'creator': 'Acrobat PDFMaker 5.0 for Word', 'author': 'New user'}, page_content='(3) In this article, unless the context otherwise requires,— \n\n(a)  “law” \n\nincludes  any  Ordinance,  order,  bye-law,  rule,  regulation, \n\nnotification, custom or usage having in the territory of India the force of law; \n\n(b)  “laws  in  force”  includes  laws  passed  or  made  by  a  Legislature  or  other \ncompetent  authority  in  the  territory  of  India  before  the  commencement  of  this \nConstitution and not previously repealed, notwithstanding that any such law or \nany part thereof may not be then in operation either at all or in particular areas. \n(4) Nothing in this article shall apply to any am

In [14]:
result = graph.invoke({"question": "What are some law related to men protection from women. Are indian law weak for men?"})

print(f'Context: {result["context"]}\n\n')
print(f'Answer: {result["answer"]}')

Context: [Document(metadata={'source': '/home/sneha/langchain-rag/IndianConstitution_Eng.pdf', 'moddate': '2005-11-16T12:43:55+05:30', 'total_pages': 291, 'producer': 'Acrobat Distiller 5.0 (Windows)', 'start_index': 46605, 'author': 'New user', 'creationdate': '2005-11-11T12:20:49+05:30', 'title': 'PREFACE', 'creator': 'Acrobat PDFMaker 5.0 for Word'}, page_content='(3) In this article, unless the context otherwise requires,— \n\n(a)  “law” \n\nincludes  any  Ordinance,  order,  bye-law,  rule,  regulation, \n\nnotification, custom or usage having in the territory of India the force of law; \n\n(b)  “laws  in  force”  includes  laws  passed  or  made  by  a  Legislature  or  other \ncompetent  authority  in  the  territory  of  India  before  the  commencement  of  this \nConstitution and not previously repealed, notwithstanding that any such law or \nany part thereof may not be then in operation either at all or in particular areas. \n(4) Nothing in this article shall apply to any am

In [None]:
result = graph.invoke({"question": "What are some law related to men protection from women. Are indian law weak for men?"})

print(f'Context: {result["context"]}\n\n')
print(f'Answer: {result["answer"]}')

Context: [Document(metadata={'source': '/home/sneha/langchain-rag/IndianConstitution_Eng.pdf', 'moddate': '2005-11-16T12:43:55+05:30', 'total_pages': 291, 'producer': 'Acrobat Distiller 5.0 (Windows)', 'start_index': 46605, 'author': 'New user', 'creationdate': '2005-11-11T12:20:49+05:30', 'title': 'PREFACE', 'creator': 'Acrobat PDFMaker 5.0 for Word'}, page_content='(3) In this article, unless the context otherwise requires,— \n\n(a)  “law” \n\nincludes  any  Ordinance,  order,  bye-law,  rule,  regulation, \n\nnotification, custom or usage having in the territory of India the force of law; \n\n(b)  “laws  in  force”  includes  laws  passed  or  made  by  a  Legislature  or  other \ncompetent  authority  in  the  territory  of  India  before  the  commencement  of  this \nConstitution and not previously repealed, notwithstanding that any such law or \nany part thereof may not be then in operation either at all or in particular areas. \n(4) Nothing in this article shall apply to any am