In [None]:
# Create the folder structure for all four legal acts
!mkdir /content/divorce
!mkdir /content/copyright
!mkdir /content/consumer_protection
!mkdir /content/inheritance

In [None]:
# installing all necessary packages
%pip install llama-index
%pip install llama-index-core
%pip install llama-index-embeddings-huggingface
%pip install llama-index-readers-file
%pip install transformers accelerate bitsandbytes
%pip install llama-index-readers-web
%pip install --upgrade --quiet llama-index-llms-nvidia llama-index-embeddings-nvidia llama-index-readers-file

Collecting llama-index
  Downloading llama_index-0.12.20-py3-none-any.whl.metadata (12 kB)
Collecting llama-index-agent-openai<0.5.0,>=0.4.0 (from llama-index)
  Downloading llama_index_agent_openai-0.4.6-py3-none-any.whl.metadata (727 bytes)
Collecting llama-index-cli<0.5.0,>=0.4.0 (from llama-index)
  Downloading llama_index_cli-0.4.0-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-core<0.13.0,>=0.12.20 (from llama-index)
  Downloading llama_index_core-0.12.20-py3-none-any.whl.metadata (2.6 kB)
Collecting llama-index-embeddings-openai<0.4.0,>=0.3.0 (from llama-index)
  Downloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl.metadata (684 bytes)
Collecting llama-index-indices-managed-llama-cloud>=0.4.0 (from llama-index)
  Downloading llama_index_indices_managed_llama_cloud-0.6.8-py3-none-any.whl.metadata (3.6 kB)
Collecting llama-index-llms-openai<0.4.0,>=0.3.0 (from llama-index)
  Downloading llama_index_llms_openai-0.3.22-py3-none-any.whl.metadata (3.3 kB)
Collec

In [None]:
# Importing necessary packages
import pandas as pd
import numpy as np

from llama_index.core import Settings
from llama_index.core import Document
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline

import logging
import sys
import os
import getpass
from IPython.display import Markdown, display
from llama_index.core import (
    SimpleDirectoryReader,
    StorageContext,
    VectorStoreIndex,
    Response
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.response.notebook_utils import display_source_node
from llama_index.llms.nvidia import NVIDIA
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
from llama_index.core.query_engine import TransformQueryEngine
from llama_index.core.evaluation import RelevancyEvaluator
from llama_index.core.evaluation import FaithfulnessEvaluator
from llama_index.core.evaluation import SemanticSimilarityEvaluator
from llama_index.core.evaluation import CorrectnessEvaluator
import nest_asyncio
from tqdm.asyncio import tqdm_asyncio

In [None]:
# del os.environ['NVIDIA_API_KEY']  ## delete key and reset
if os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
    print("Valid NVIDIA_API_KEY already in environment. Delete to reset")
else:
    nvapi_key = getpass.getpass("NVAPI Key (starts with nvapi-): ")
    assert nvapi_key.startswith(
        "nvapi-"
    ), f"{nvapi_key[:5]}... is not a valid key"
    os.environ["NVIDIA_API_KEY"] = nvapi_key

NVAPI Key (starts with nvapi-): ··········


In [None]:
nest_asyncio.apply()

In [None]:
llm = NVIDIA(llm = "deepseek-ai/deepseek-r1")
evaluation_llm = NVIDIA()

In [None]:
# downloading the embedding model with the HuggingFace token
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# Global settings for the LLM and embedding model
Settings.llm = llm
Settings.embed_model = embed_model

In [None]:
relevancy_evaluator = RelevancyEvaluator(llm=evaluation_llm)
faithfulness_evaluator = FaithfulnessEvaluator(llm=evaluation_llm)
similarity_evaluator = SemanticSimilarityEvaluator()
correctness_evaluator = CorrectnessEvaluator(llm=evaluation_llm)

In [None]:
# Loading the document for all 4 legal acts
doc_loader1 = SimpleDirectoryReader("/content/copyright")
documents1 = doc_loader1.load_data()

doc_loader2 = SimpleDirectoryReader("/content/divorce")
documents2 = doc_loader2.load_data()

doc_loader3 = SimpleDirectoryReader("/content/consumer_protection")
documents3 = doc_loader3.load_data()

doc_loader4 = SimpleDirectoryReader("/content/inheritance")
documents4 = doc_loader4.load_data()

# Creating a SentenceSplitter with chunk size, chunk overlap
text_splitter = SentenceSplitter(
  separator=" ",
  chunk_size=80,
  chunk_overlap=20

)

pipeline = IngestionPipeline(
    transformations=[
        text_splitter,
        embed_model
    ])

# Chunking the document
nodes1 = pipeline.run(documents=documents1)
print("Number of nodes for copyright:",len(nodes1))

nodes2 = pipeline.run(documents=documents2)
print("Number of nodes for divorce:",len(nodes2))

nodes3 = pipeline.run(documents=documents3)
print("Number of nodes for consumer protection:",len(nodes3))

nodes4 = pipeline.run(documents=documents4)
print("Number of nodes for inheritance:",len(nodes4))

# Creating an index and storing in an in-memory vectorstore
index_copyright = VectorStoreIndex(nodes1)
index_divorce = VectorStoreIndex(nodes2)
index_consumer_protection = VectorStoreIndex(nodes3)
index_inheritance = VectorStoreIndex(nodes4)

Number of nodes for copyright: 18
Number of nodes for divorce: 72
Number of nodes for consumer protection: 16
Number of nodes for inheritance: 47


In [None]:
df_copyright = pd.read_excel("/content/Evaluation_Dataset.xlsx", sheet_name = "Copyright")
df_divorce = pd.read_excel("/content/Evaluation_Dataset.xlsx", sheet_name = "Divorce")
df_cp = pd.read_excel("/content/Evaluation_Dataset.xlsx", sheet_name = "CP")
df_inheritance = pd.read_excel("/content/Evaluation_Dataset.xlsx", sheet_name = "Inheritance")

## HyDE Retrieval

In [None]:
query_engine_copyright1 = index_copyright.as_query_engine()
hyde_copyright1 = HyDEQueryTransform(include_original=True)
hyde_query_engine1 = TransformQueryEngine(query_engine_copyright1, hyde_copyright1)

query_engine_divorce1 = index_divorce.as_query_engine()
hyde_divorce1 = HyDEQueryTransform(include_original=True)
hyde_query_engine2 = TransformQueryEngine(query_engine_divorce1, hyde_divorce1)

query_engine_cp1 = index_consumer_protection.as_query_engine()
hyde_cp1 = HyDEQueryTransform(include_original=True)
hyde_query_engine3 = TransformQueryEngine(query_engine_cp1, hyde_cp1)

query_engine_inheritance1 = index_inheritance.as_query_engine()
hyde_inheritance1 = HyDEQueryTransform(include_original=True)
hyde_query_engine4 = TransformQueryEngine(query_engine_inheritance1, hyde_inheritance1)

In [None]:
df_copyright1 = df_copyright.copy()
df_divorce1 = df_divorce.copy()
df_cp1 = df_cp.copy()
df_inheritance1 = df_inheritance.copy()

In [None]:
df_copyright1.head()

Unnamed: 0,Query,Context,Reference
0,"What is the primary purpose of copyright, acco...",It is a bundle of rights comprising of rights ...,The primary purpose of copyright is to ensure ...
1,What is the form that needs to be filled out t...,"To apply for copyright, application for regist...",Form IV needs to be filled out to apply for co...
2,What is the typical duration of copyright prot...,Copyright protection typically lasts for 60 ye...,Typical duration of copyright duration is 60 y...
3,What is the minimum punishment for copyright i...,Remedy for copyright infringement\r\nCopyright...,The minimum punishment for infringement of a c...
4,What is the minimum rank of a police officer w...,"Also, in case a copyright infringement has hap...",The minimum rank of a police officer who can s...


In [None]:
queries = df_copyright1["Query"].tolist()
reference_responses = df_copyright1['Reference'].tolist()
responses = []
relevancy_scores = []
faithfulness_scores = []
semantic_similarity_scores = []
correctness_scores = []

for index in range(0, len(queries)):
  response = hyde_query_engine1.query(queries[index])
  relevancy_score = relevancy_evaluator.evaluate_response(queries[index], response)
  faithfulness_score = faithfulness_evaluator.evaluate_response(response = response)

  similarity_score = similarity_evaluator.evaluate(response = response.response, reference = reference_responses[index])

  correctness_score = correctness_evaluator.evaluate(queries[index], response.response, reference_responses[index])

  relevancy_scores.append(relevancy_score.passing)
  faithfulness_scores.append(faithfulness_score.passing)
  semantic_similarity_scores.append(similarity_score.score)
  correctness_scores.append(correctness_score.score)
  responses.append(response.response)




In [None]:
df_copyright1['LLM_response'] = responses
df_copyright1['Relevancy Score'] = relevancy_scores
df_copyright1['Faithfulness Score'] = faithfulness_scores
df_copyright1['Semantic Similarity Score'] = semantic_similarity_scores
df_copyright1['Correctness Score'] = correctness_scores
df_copyright1.head()

Unnamed: 0,Query,Context,Reference,LLM_response,Relevancy Score,Faithfulness Score,Semantic Similarity Score,Correctness Score
0,"What is the primary purpose of copyright, acco...",It is a bundle of rights comprising of rights ...,The primary purpose of copyright is to ensure ...,The primary purpose of copyright is a right to...,True,True,0.890192,4.0
1,What is the form that needs to be filled out t...,"To apply for copyright, application for regist...",Form IV needs to be filled out to apply for co...,Form IV.,True,False,0.725553,4.0
2,What is the typical duration of copyright prot...,Copyright protection typically lasts for 60 ye...,Typical duration of copyright duration is 60 y...,60 years.,True,True,0.787101,3.0
3,What is the minimum punishment for copyright i...,Remedy for copyright infringement\r\nCopyright...,The minimum punishment for infringement of a c...,Imprisonment for six months with a minimum fin...,True,True,0.895927,4.0
4,What is the minimum rank of a police officer w...,"Also, in case a copyright infringement has hap...",The minimum rank of a police officer who can s...,A sub-inspector.,True,False,0.70444,4.0


In [None]:
df_copyright1.to_excel("/content/df_copyright_hyde.xlsx", index = False)

In [None]:
df_divorce1.head()

Unnamed: 0,Query,Context,Reference
0,What type of divorce is considered by the cour...,Divorce with Mutual Consent\r\nWhen the couple...,Divorce with Mutual consent
1,What is the minimum duration of separation req...,"Section 10A of Indian Divorce Act, 1869, requi...",The minimum duration of sepration required for...
2,What are the three aspects that couples need t...,maintenance and property rights need to be agr...,The three aspects that couples needto reach a ...
3,What types of expenses will be considered when...,"Custody of child, alimony to wife and litigati...",Expenses considered while issuing a decree of ...
4,What is one circumstance that may favor the sp...,the failing health or a medical condition of o...,Failing health or a medical condition of the s...


In [None]:
queries = df_divorce1["Query"].tolist()
reference_responses = df_divorce1['Reference'].tolist()
responses = []
relevancy_scores = []
faithfulness_scores = []
semantic_similarity_scores = []
correctness_scores = []

for index in range(0, len(queries)):
  response = hyde_query_engine2.query(queries[index])
  relevancy_score = relevancy_evaluator.evaluate_response(queries[index], response)
  faithfulness_score = faithfulness_evaluator.evaluate_response(response = response)

  similarity_score = similarity_evaluator.evaluate(response = response.response, reference = reference_responses[index])

  correctness_score = correctness_evaluator.evaluate(queries[index], response.response, reference_responses[index])

  relevancy_scores.append(relevancy_score.passing)
  faithfulness_scores.append(faithfulness_score.passing)
  semantic_similarity_scores.append(similarity_score.score)
  correctness_scores.append(correctness_score.score)
  responses.append(response.response)

In [None]:
df_divorce1['LLM_response'] = responses
df_divorce1['Relevancy Score'] = relevancy_scores
df_divorce1['Faithfulness Score'] = faithfulness_scores
df_divorce1['Semantic Similarity Score'] = semantic_similarity_scores
df_divorce1['Correctness Score'] = correctness_scores
df_divorce1.head()

Unnamed: 0,Query,Context,Reference,LLM_response,Relevancy Score,Faithfulness Score,Semantic Similarity Score,Correctness Score
0,What type of divorce is considered by the cour...,Divorce with Mutual Consent\r\nWhen the couple...,Divorce with Mutual consent,A divorce with mutual consent.,True,True,0.955422,4.0
1,What is the minimum duration of separation req...,"Section 10A of Indian Divorce Act, 1869, requi...",The minimum duration of sepration required for...,Two years.,False,False,0.718859,3.0
2,What are the three aspects that couples need t...,maintenance and property rights need to be agr...,The three aspects that couples needto reach a ...,"Alimony or maintenance issues, child custody, ...",True,False,0.79149,4.0
3,What types of expenses will be considered when...,"Custody of child, alimony to wife and litigati...",Expenses considered while issuing a decree of ...,"Custody of child, alimony to wife, and litigat...",True,True,0.924057,3.0
4,What is one circumstance that may favor the sp...,the failing health or a medical condition of o...,Failing health or a medical condition of the s...,The failing health or a medical condition of o...,True,False,0.868931,4.0


In [None]:
df_divorce1.to_excel("/content/df_divorce_hyde.xlsx", index = False)

In [None]:
df_cp1.head()

Unnamed: 0,Query,Context,Reference
0,What is the main law that provides protection ...,\ufeffConsumer Protection Laws in India\r\nCon...,"Consumer Protection Amendement Act, 2002 is th..."
1,What is the primary objective of the Consumer ...,"In this article, we look at the protection aff...",The primary objective of the Consumer Protecti...
2,What is the procedure to file a complaint in c...,Procedure to File Consumer Case\r\nAny consume...,Any consumer complaint relating to a good or s...
3,What is the possible outcome if the complainan...,"Hence, if during the proceedings, the complain...",If the complainant fails to appear on the date...
4,What kind of goods and services does consumer ...,The Act covers all goods and services includin...,The Consumer protection Act covers all goods a...


In [None]:
queries = df_cp1["Query"].tolist()
reference_responses = df_cp1['Reference'].tolist()
responses = []
relevancy_scores = []
faithfulness_scores = []
semantic_similarity_scores = []
correctness_scores = []

for index in range(0, len(queries)):
  response = hyde_query_engine3.query(queries[index])
  relevancy_score = relevancy_evaluator.evaluate_response(queries[index], response)
  faithfulness_score = faithfulness_evaluator.evaluate_response(response = response)

  similarity_score = similarity_evaluator.evaluate(response = response.response, reference = reference_responses[index])

  correctness_score = correctness_evaluator.evaluate(queries[index], response.response, reference_responses[index])

  relevancy_scores.append(relevancy_score.passing)
  faithfulness_scores.append(faithfulness_score.passing)
  semantic_similarity_scores.append(similarity_score.score)
  correctness_scores.append(correctness_score.score)
  responses.append(response.response)

In [None]:
df_cp1['LLM_response'] = responses
df_cp1['Relevancy Score'] = relevancy_scores
df_cp1['Faithfulness Score'] = faithfulness_scores
df_cp1['Semantic Similarity Score'] = semantic_similarity_scores
df_cp1['Correctness Score'] = correctness_scores
df_cp1.head()

Unnamed: 0,Query,Context,Reference,LLM_response,Relevancy Score,Faithfulness Score,Semantic Similarity Score,Correctness Score
0,What is the main law that provides protection ...,\ufeffConsumer Protection Laws in India\r\nCon...,"Consumer Protection Amendement Act, 2002 is th...",The main law that provides protection to consu...,True,True,0.926599,4.0
1,What is the primary objective of the Consumer ...,"In this article, we look at the protection aff...",The primary objective of the Consumer Protecti...,To provide better protection of consumers and ...,True,True,0.92281,4.0
2,What is the procedure to file a complaint in c...,Procedure to File Consumer Case\r\nAny consume...,Any consumer complaint relating to a good or s...,Any consumer complaint relating to a good or s...,True,False,0.825472,4.0
3,What is the possible outcome if the complainan...,"Hence, if during the proceedings, the complain...",If the complainant fails to appear on the date...,The District Forum may either dismiss the comp...,True,False,0.901145,4.0
4,What kind of goods and services does consumer ...,The Act covers all goods and services includin...,The Consumer protection Act covers all goods a...,"All goods and services, including banking, e-c...",True,True,0.874324,4.0


In [None]:
df_cp1.to_excel("/content/df_cp_hyde.xlsx", index = False)

In [None]:
df_inheritance1.head()

Unnamed: 0,Query,Context,Reference
0,What is the legal document that contains speci...,What is a will?\r\nUnder the Indian Succession...,Will is the legal document that contains speci...
1,"What is a legal heir, according to the given d...",Therefore a legal heir is an individual who ta...,a legal heir is an individual who takes the pl...
2,Who is the legal heir of a female Hindu who di...,Mother of the mother\r\n* Father of the mother...,"The daughters and the sons, including the chil..."
3,What was the change in the right share in ance...,"Rights of Daughters\r\nBefore the year 2005, t...","Before the year 2005, the right share in the a..."
4,What is the minimum information required to ap...,Documents required for legal heir certificate\...,The minimum information required to apply for ...


In [None]:
queries = df_inheritance1["Query"].tolist()
reference_responses = df_inheritance1['Reference'].tolist()
responses = []
relevancy_scores = []
faithfulness_scores = []
semantic_similarity_scores = []
correctness_scores = []

for index in range(0, len(queries)):
  response = hyde_query_engine4.query(queries[index])
  relevancy_score = relevancy_evaluator.evaluate_response(queries[index], response)
  faithfulness_score = faithfulness_evaluator.evaluate_response(response = response)

  similarity_score = similarity_evaluator.evaluate(response = response.response, reference = reference_responses[index])

  correctness_score = correctness_evaluator.evaluate(queries[index], response.response, reference_responses[index])

  relevancy_scores.append(relevancy_score.passing)
  faithfulness_scores.append(faithfulness_score.passing)
  semantic_similarity_scores.append(similarity_score.score)
  correctness_scores.append(correctness_score.score)
  responses.append(response.response)

In [None]:
df_inheritance1['LLM_response'] = responses
df_inheritance1['Relevancy Score'] = relevancy_scores
df_inheritance1['Faithfulness Score'] = faithfulness_scores
df_inheritance1['Semantic Similarity Score'] = semantic_similarity_scores
df_inheritance1['Correctness Score'] = correctness_scores
df_inheritance1.head()

Unnamed: 0,Query,Context,Reference,LLM_response,Relevancy Score,Faithfulness Score,Semantic Similarity Score,Correctness Score
0,What is the legal document that contains speci...,What is a will?\r\nUnder the Indian Succession...,Will is the legal document that contains speci...,A declaration.,True,False,0.621223,3.0
1,"What is a legal heir, according to the given d...",Therefore a legal heir is an individual who ta...,a legal heir is an individual who takes the pl...,An individual who takes the place of the prope...,True,True,0.912004,4.0
2,Who is the legal heir of a female Hindu who di...,Mother of the mother\r\n* Father of the mother...,"The daughters and the sons, including the chil...",The daughters and sons (including the children...,True,True,0.814912,4.0
3,What was the change in the right share in ance...,"Rights of Daughters\r\nBefore the year 2005, t...","Before the year 2005, the right share in the a...",Only unmarried daughters had the right share i...,True,True,0.795928,4.0
4,What is the minimum information required to ap...,Documents required for legal heir certificate\...,The minimum information required to apply for ...,The minimum information required to apply for ...,True,True,0.977945,4.0


In [None]:
df_inheritance1.to_excel("/content/df_inheritance_hyde.xlsx", index = False)