In [3]:
pip install langchain langchain-mistralai langchain_community chromadb mistralai sentence-transformers tf-keras langchain-huggingface

Collecting langchain-huggingface
  Downloading langchain_huggingface-0.0.3-py3-none-any.whl.metadata (1.2 kB)
Downloading langchain_huggingface-0.0.3-py3-none-any.whl (17 kB)
Installing collected packages: langchain-huggingface
Successfully installed langchain-huggingface-0.0.3
Note: you may need to restart the kernel to use updated packages.


In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_mistralai.embeddings import MistralAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
import os
from dotenv import load_dotenv
load_dotenv()
api_key=os.getenv("MISTRAL_API_KEY")
# Load data







In [2]:
loader = PyPDFLoader("gst.pdf")
pages = loader.load_and_split()

In [3]:
import chromadb
client = chromadb.PersistentClient(path="chromadb")
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"})
db = Chroma(client=client, embedding_function=embedding_function, collection_name="gst_laws")
       




In [13]:
db.add_documents(pages)

['ccf4f38a-65ad-409e-92d0-5bf1d6222792',
 '510b7ea0-f153-4ae1-adcd-b5a9413a6640',
 '4aedc17e-279f-43bd-aa4b-3dc7db5de0a2',
 'aab969f2-51e7-4b2d-ba95-e4771a3c0afc',
 '9c7bac70-3900-4f17-b4a2-fff1645cf611',
 'ff595013-75ab-4adb-9a87-021d2f9c0143',
 '95cb2fae-7aa8-44e7-983b-a01646081752',
 'f24e5101-a9af-4c4a-a7b7-3ff1082c1a4b',
 'd0ad87a8-3ff6-4f09-8640-f1aa05c5eaeb',
 'bf9e6726-2fb6-484b-a2c5-f5b98fd8d27d',
 '8a071b1f-5927-456d-ac9a-2ff70b58b951',
 'd2b1e887-ab1b-4c5e-9b7b-3d44f3a52432',
 '51d6ee6e-9742-4622-9e0c-f8c06bf1776a',
 '88d40baf-1d40-44cd-878b-45d76452ded6',
 'd3051ac1-12cb-48e4-8514-6796f971a950',
 '88ad96db-17aa-4191-bdde-285be6a97b46',
 'a55259d4-e3e8-4611-9898-a92c9a8c45d0',
 '18fcb295-2da1-4d37-a935-932b2f33a351',
 'd939564d-8497-477a-9f08-de67838a905b',
 'eec4aabf-530c-40b4-ba2b-489b7d87a47a',
 'c7bfbe72-b563-4f9e-a0b8-365c6ba203c5',
 '2ceaa332-40a3-4c47-980f-2b097e0b292e',
 '889bb6ad-1430-45b2-a1ae-32e4206f2be9',
 '1bb93646-8f63-4cba-af29-b5fe577d1159',
 '222f4e86-d451-

In [9]:
model = ChatMistralAI(mistral_api_key=api_key)
# Define prompt template
prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

# Create a retrieval chain to answer questions
document_chain = create_stuff_documents_chain(model, prompt)
retrieval_chain = create_retrieval_chain(db.as_retriever(search_kwargs={"k": 10}), document_chain)


In [15]:
query="who will appoint national appellate authority?"

In [16]:
response = retrieval_chain.invoke({"input": query})
print(response["answer"])

The President of the National Appellate Authority shall be appointed by the Government after consultation with the Chief Justice of India or his nominee. The Technical Member (Centre) and Technical Member (State) of the National Appellate Authority shall be appointed by the Government on the recommendations of a Selection Committee consisting of such persons and in such manner as may be prescribed.


In [11]:
%pip install FlagEmbedding

Collecting FlagEmbedding
  Downloading FlagEmbedding-1.2.10.tar.gz (141 kB)
     ---------------------------------------- 0.0/141.3 kB ? eta -:--:--
     -------- ------------------------------ 30.7/141.3 kB 1.4 MB/s eta 0:00:01
     ---------- -------------------------- 41.0/141.3 kB 393.8 kB/s eta 0:00:01
     ------------------------ ------------ 92.2/141.3 kB 655.4 kB/s eta 0:00:01
     ---------------------------- ------- 112.6/141.3 kB 656.4 kB/s eta 0:00:01
     ------------------------------------ 141.3/141.3 kB 601.4 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting datasets (from FlagEmbedding)
  Downloading datasets-2.20.0-py3-none-any.whl.metadata (19 kB)
Collecting accelerate>=0.20.1 (from FlagEmbedding)
  Downloading accelerate-0.31.0-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow-hotfix (from datasets->FlagEmbedding)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)
Co

In [13]:
from FlagEmbedding import FlagReranker
reranker = FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True, device='cpu')

model.safetensors:  88%|########7 | 1.99G/2.27G [00:00<?, ?B/s]

In [17]:
results = db.similarity_search(query, k=20)

In [18]:
pairs = [[query, result.page_content] for result in results]
scores = reranker.compute_score(pairs, normalize=True)

In [20]:
sorted_results = sorted(zip(results, scores), key=lambda x: x[1], reverse=True)




In [37]:
pairs

[['who will appoint national appellate authority?',
  '75 \n (2) The National Appellate Authority shall consist of — \n(i) the President, who has been a Judge of the Supreme Court or is or has been the Chief Justice \nof a High Court, or is or has been a Judge of a High  Court for a period not less than five years;  \n(ii) a Technical Member (Centre) who is or has been a member of Indian Revenue (Customs and \nCentral Excise) Service, Group A, and has completed at least fifteen years of service in Group A;  \n(iii) a Technical Me mber (State) who is or has been an officer of the State Government not below \nthe rank of Additional Commissioner of Value Added Tax or the Additional Commissioner of State \ntax with at least three years of experience in the administration of an existing law  or the State Goods \nand Services Tax Act or in the field of finance and taxation.  \n(3) The President of the National Appellate Authority shall be appointed by the Government after \nconsultation with

In [36]:
for doc, score in sorted_results[:5]:
    print(doc)

page_content='75 \n (2) The National Appellate Authority shall consist of — \n(i) the President, who has been a Judge of the Supreme Court or is or has been the Chief Justice \nof a High Court, or is or has been a Judge of a High  Court for a period not less than five years;  \n(ii) a Technical Member (Centre) who is or has been a member of Indian Revenue (Customs and \nCentral Excise) Service, Group A, and has completed at least fifteen years of service in Group A;  \n(iii) a Technical Me mber (State) who is or has been an officer of the State Government not below \nthe rank of Additional Commissioner of Value Added Tax or the Additional Commissioner of State \ntax with at least three years of experience in the administration of an existing law  or the State Goods \nand Services Tax Act or in the field of finance and taxation.  \n(3) The President of the National Appellate Authority shall be appointed by the Government after \nconsultation with the Chief Justice of India or his nomine

In [28]:
context = "\n\n".join(sorted_results[0:5][0])

TypeError: sequence item 0: expected str instance, Document found

In [None]:
response=model.invoke(input="""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {query}""")

In [None]:
response