In [18]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_text_splitters import RecursiveCharacterTextSplitter
import json
from langchain.prompts import PromptTemplate


In [2]:
input_file = "/Users/shreyasb/worskpace/shreyas/python/mtech-dissertation/mistral-7b/data/ipc-train-1.jsonl"
db_path="/Users/shreyasb/worskpace/shreyas/python/mtech-dissertation/rag/chromadb"

In [3]:
def extract_content(record):
    messages = record['messages']
    content = ""
    for message in messages:
        if message['role'] in ['system', 'user']:
            content += message['content'] + " "
    return content.strip()

# Load the JSONL file
data = []
with open(input_file, 'r') as file:
    for line in file:
        data.append(json.loads(line))

# Extract content from each record
texts = [extract_content(record) for record in data]

# Split texts into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_text("\n\n".join(texts))

In [4]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [8]:
from tqdm import tqdm

# Create and persist the vector store in batches
batch_size = 1000 
vectorstore = None

for i in tqdm(range(0, len(chunks), batch_size)):
    batch = chunks[i:i+batch_size]
    
    if vectorstore is None:
        vectorstore = Chroma.from_texts(batch, embeddings, persist_directory=db_path)
    else:
        vectorstore.add_texts(batch)

100%|██████████| 97/97 [14:51<00:00,  9.19s/it]


In [36]:
vectorstore = Chroma(persist_directory=db_path, embedding_function=embeddings)

# Create a retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# Set up the language model
llm = ChatOpenAI(model_name="gpt-3.5-turbo", api_key="", temperature=0)
# Define the system prompt

prompt_template = PromptTemplate.from_template(system_prompt)

In [34]:
# Create the RAG chain with the system prompt
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)


In [37]:
def answer_question(question):
    system_prompt = """You are an AI assistant specialized in Indian Penal Code (IPC). When asked about relevant IPC sections for a given case, respond ONLY with the section numbers in the following format:

Section X of the Indian Penal Code, Section Y of the Indian Penal Code, ...

Do not provide any explanations or additional text. Only list the relevant IPC sections in the specified format."""
    prompt_question = f"""
{system_prompt}
{question}"""
    result = qa_chain({"query": prompt_question})
    answer = result['result']
    sources = [doc.page_content for doc in result['source_documents']]
    return answer, sources

In [38]:
# Example usage
question = "The State has come up in revision against order-dated 20.09.1999 passed by Sessions Judge, Sehore in S.T. No.126\/1999 whereby holding that no offence under Section 307 of the IPC was made out, he proceeded to transfer the case, under Section 228(1) of the Code of Criminal Procedure (for short 'the Code'), to Chief Judicial Magistrate, Sehore for trial.The respondents were charge-sheeted for the offences under Sections 147, 148 and 307 read with S.149 of the IPC upon the allegations that being armed with deadly weapons like gupti and lohangi, they constituted an unlawful assembly and in furtherance of the common object thereof, jointly assaulted complainant Shafique, his elder brothers Lateef and Haneef, mother Hoora Bi and Bhabhi Shabra Bi and abdominal injury sustained by Lateef was characterized by the medical expert as dangerous to life.:: 2 ::A bare perusal of the corresponding operative finding would reveal that the abdominal injury had resulted in peritoneal tear, omentum tear and splenic contusion.Learned Sessions Judge, after hearing the parties, took the view that in absence of expert opinion that the injury received by Lateef was sufficient in the ordinary course of nature to cause death, charge of attempt to murder could not be framed.However, this reasoning was apparently misconceived in view of the well- settled position of law on the point as explained by the Apex Court in State of Maharashtra v. Balram Bama Patil AIR 1983 SCThe impugned order, therefore, deserves interference.Learned counsel for the respondents still contended that the Sessions Judge, while declining to frame charge of the offence under Section 307 of the IPC, had transferred the counter case to Chief Judicial Magistrate for trial and the corresponding order has attained finality.\"The charge in each criminal case is framed on the basis of materials available in the records of that particular case.Merely because the charge for offence under Section 307, IPC has not been framed in the counter case, the petitioners do not become entitled to be discharged for the offence under Section 307, IPC, if they are otherwise liable to be charged for the offence under that section in view of the materials placed before the learned Judge.\"In the result, the revision stands allowed and the order- dated 20.09.1999 is hereby set aside.Revision allowed.(R.C.MISHRA) JUDGE 29.06.2010"
answer, sources = answer_question(question)

print("Question:", question)
print("\nAnswer:", answer)
print("\nSources:")
for i, source in enumerate(sources, 1):
    print(f"\nSource {i}:")
    print(source)

Question: The State has come up in revision against order-dated 20.09.1999 passed by Sessions Judge, Sehore in S.T. No.126\/1999 whereby holding that no offence under Section 307 of the IPC was made out, he proceeded to transfer the case, under Section 228(1) of the Code of Criminal Procedure (for short 'the Code'), to Chief Judicial Magistrate, Sehore for trial.The respondents were charge-sheeted for the offences under Sections 147, 148 and 307 read with S.149 of the IPC upon the allegations that being armed with deadly weapons like gupti and lohangi, they constituted an unlawful assembly and in furtherance of the common object thereof, jointly assaulted complainant Shafique, his elder brothers Lateef and Haneef, mother Hoora Bi and Bhabhi Shabra Bi and abdominal injury sustained by Lateef was characterized by the medical expert as dangerous to life.:: 2 ::A bare perusal of the corresponding operative finding would reveal that the abdominal injury had resulted in peritoneal tear, omen