In [97]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import ollama
import chromadb
import json

In [17]:
load_dotenv()
os.environ['LANGCHAIN_TRACING_V2'] = os.getenv('LANGCHAIN_TRACING_V2')
os.environ['LANGCHAIN_ENDPOINT'] = os.getenv('LANGCHAIN_ENDPOINT')
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')

In [86]:
loader = PyPDFLoader("../common_files/IPC.pdf")
pages = loader.load_and_split()
# full_pdf_text = ""
# for each in pages:
#     full_pdf_text += each.page_content + " "
all_pages_text = [] 
full_pdf_text = []
for each in pages:
    full_pdf_text.append(each.page_content)

In [87]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)

In [88]:
texts = text_splitter.create_documents(full_pdf_text)
print(len(texts))

620


In [89]:
texts[:5]

[Document(page_content='1 \n THE INDIAN PENAL CODE  \n___________  \nARRANGEMENT OF SECTIONS  \n__________  \nCHAPTER I  \nINTRODUCTION  \nPREAMBLE  \nSECTIONS  \n1. Title and extent of operation of the Code.  \n2. Punishment of offences committed within India.  \n3. Punishment of offences committed beyond, but which by law may be tried within, India.  \n4. Extension of Code to extra -territorial offences.  \n5. Certain laws not to be affected by this Act.  \nCHAPTER II  \nGENERAL  EXPLANATIONS  \n6. Definitions in the Code to be understood subject to exceptions.  \n7. Sense of expression once explained.  \n8. Gender.  \n9. Number.  \n10. “Man”.  “Woman”.  \n11. “Person”.  \n12.  “Public”.  \n13. [Omitted .]. \n14. “Servant of Government”.  \n15. [Repealed. ]. \n16. [Repealed .]. \n17. “Government”.  \n18. “India”.  \n19. “Judge”.  \n20. “Court of Justice”.  \n21. “Public  servant”.  \n22. “Moveable property”.  \n23. “Wrongful gain”.  \n“Wrongful loss”.  \nGainin g wrongfully/ Losing w

In [95]:
client = chromadb.Client()
collection = client.create_collection(name="ipc_docs")

In [100]:
# store each document in a vector embedding database
for i, d in enumerate(texts):
  response = ollama.embeddings(model="mxbai-embed-large", prompt=d.page_content)
  embedding = response["embedding"]
  collection.add(
    ids=[str(i)],
    embeddings=[embedding],
    documents=[d.page_content]
  )

In [103]:
# an example prompt
prompt = "What is Punishment for dacoity?"

# generate an embedding for the prompt and retrieve the most relevant doc
response = ollama.embeddings(
  prompt=prompt,
  model="mxbai-embed-large"
)
results = collection.query(
  query_embeddings=[response["embedding"]],
  n_results=1
)
data = results['documents']

In [104]:
data

[['imprisonment with which such offender shall be punished shal l not be less than seven years.  \n399. Making preparation to commit dacoity .—Whoever makes any preparation for committing \ndacoity, shall be punished with rigorous imprisonment for a term which may extend to ten years, an d shall  \nalso be liable to fine.']]

In [106]:
# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model="llama3",
  prompt=f"Using this data: {data}. Respond to this prompt: {prompt}"
)

print(output['response'])

According to the data provided, there are two related punishments mentioned:

1. For "imprisonment with which such offender shall be punished shal l not be less than seven years."
2. For "Making preparation to commit dacoity" - The punishment is:
	* Rigorous imprisonment for a term that may extend up to 10 years.
	* Also liable to fine.

Note that these punishments are not directly related to the act of dacoity itself, but rather to the making of preparations to commit it and the actual commission of the crime.
