In [None]:
from datasets import load_dataset
from langchain.schema import Document

# Load the dataset (you may need to be logged in to Hugging Face CLI)
dataset = load_dataset("SnehaDeshmukh/IndianBailJudgments-1200")

In [22]:
dataset

DatasetDict({
    train: Dataset({
        features: ['case_id', 'case_title', 'court', 'date', 'judge', 'ipc_sections', 'bail_type', 'bail_cancellation_case', 'landmark_case', 'accused_name', 'accused_gender', 'prior_cases', 'bail_outcome', 'bail_outcome_label_detailed', 'crime_type', 'facts', 'legal_issues', 'judgment_reason', 'summary', 'bias_flag', 'parity_argument_used', 'legal_principles_discussed', 'region', 'source_filename', 'special_laws'],
        num_rows: 1200
    })
})

In [23]:
docs = []

for split in dataset.keys():
    for row in dataset[split]:
        parts = [
            row.get("facts","").strip(),
            row.get("legal_issues","").strip(),
            row.get("judgment_reason","").strip(),
            row.get("summary","").strip(),
        ]

        text = "\n\n".join(x for x in parts if x)
        metadata = {"id":row.get("id"), "split":split}
        docs.append(Document(page_content=text, metadata=metadata))

In [25]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 2000,
                                               chunk_overlap = 200)
splits = text_splitter.split_documents(docs)

In [26]:
print(splits[0])
print(splits[1])

page_content='Jibangshu Paul was apprehended carrying Rs. 32,11,000 in cash, suspected to be intended for the DHD(J) militant group. He was earlier granted bail for IPC sections. Later, serious sections under the Unlawful Activities (Prevention) Act were added, and NIA sought his re-arrest. The Special Court directed custody, rejecting his prayer to continue on earlier bail.

['Whether fresh bail is needed when new, more serious penal sections are added', 'Whether cancellation of earlier bail is required due to lack of jurisdiction', 'Powers of the Special Court under NIA Act to take cognizance and grant bail']

The court held that newly added serious UA(P) Act offences required separate consideration and the Special Judge could not continue prior bail orders granted under IPC sections. Hence, fresh bail was required, and custody was ordered.

Bail earlier granted to Jibangshu Paul under IPC sections was cancelled after UA(P) Act sections were added. Court ruled fresh bail was needed f

In [27]:
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings 

embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004",google_api_key="XXXXX")  
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

In [28]:
print(vectorstore._collection.get())



In [29]:
retriever = vectorstore.as_retriever()

In [30]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")



In [33]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    google_api_key="XXXXX"
)

In [34]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [35]:
def format_docs(docs):
    return "\n".join(doc.page_content for doc in docs)

In [36]:
rag_chain = ({"context":retriever | format_docs, "question":RunnablePassthrough()}
             | prompt
             | llm
             | StrOutputParser())

In [37]:
rag_chain.invoke("Under what conditions did courts deny bail for murder accused?")

'Courts deny bail for murder accused when there is a risk of witness intimidation, a history of criminal activity, or a threat to public safety. Bail can also be denied if there are reasonable grounds to believe the accused is guilty of an offense punishable by death or life imprisonment. Additionally, bail granted for lesser charges does not automatically extend when charges are enhanced to capital offenses.'