In [2]:
!pip install langchain
!pip install pypdf
!pip install sentence-transformers
!pip install transformers
!pip install langchain_community

Collecting langchain
  Downloading langchain-0.2.14-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.3.0,>=0.2.32 (from langchain)
  Downloading langchain_core-0.2.34-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.2-py3-none-any.whl.metadata (2.1 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.104-py3-none-any.whl.metadata (13 kB)
Collecting tenacity!=8.4.0,<9.0.0,>=8.1.0 (from langchain)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.32->langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting httpx<1,>=0.23.0 (from langsmith<0.2.0,>=0.1.17->langchain)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain)
  Downloading orjson-3.10.7-cp310-cp31

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
loader = PyPDFLoader("/content/constitution-of-india.pdf")
pages = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=150
)
splits = text_splitter.split_documents(pages)

print(f"Number of document splits: {len(splits)}")

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
document_embeddings = model.encode([doc.page_content for doc in splits])

# Using GPT-Neo for Text Generation
generator = pipeline('text-generation', model='EleutherAI/gpt-neo-1.3B')

# Simple Question-Answering Function
def simple_qa_chain(question, context):
    prompt = f"Context: {context}\n\nQuestion: {question}\nAnswer:"
    response = generator(prompt, max_new_tokens=150, truncation=True)
    return response[0]['generated_text']

# Improved Conversational QA Chain with Document Search
def conversational_qa_chain(question, embeddings, documents):
    # Find the most relevant document segment
    question_embedding = model.encode([question])
    similarities = cosine_similarity(question_embedding, embeddings)
    most_relevant_index = similarities.argmax()

    context = documents[most_relevant_index].page_content

    # Generate answer based on context
    prompt = f"Context: {context}\n\nQuestion: {question}\nAnswer:"
    response = generator(prompt, max_new_tokens=250, truncation=True)

    return response[0]['generated_text']

# Chatbot Interface
def chatbot_interface():
    # Print heading
    print("\n" + "="*50)
    print(" " * 10 + "Welcome to the Constitution of India Chatbot!")
    print("="*50 + "\n")

    print("Type 'exit' to end the chat.")

    while True:
        question = input("You: ")
        if question.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break

        # Get the answer using the conversational QA chain
        answer = conversational_qa_chain(question, document_embeddings, splits)
        print(f"Chatbot: {answer}")

# Run the chatbot interface
chatbot_interface()



  from tqdm.autonotebook import tqdm, trange


Number of document splits: 768


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.31G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]


          Welcome to the Constitution of India Chatbot!

Type 'exit' to end the chat.
You: What are the laws of POCSO in India?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Chatbot: Context: THE CONSTITUTION OF  INDIA
(Seventh Schedule)  317
List II—State List
1. Public order (but not including 1[the use of any naval, military or air 
force or any other armed force of the Union or of any other force subject to the 
control of the Union or of any contingent or unit thereof] in aid of the civil 
power).
2[2. Police (including railway and village police) subject to the provisions 
of entry 2A of List I.]
3. 3*** Officers and servants of the High Court; procedure in rent and 
revenue courts; fees taken in all courts except the Supreme Court.
4. Prisons, reformatories, Borstal institutions and other institutions of a 
like nature, and persons detained therein; arrangements with other States for the 
use of prisons and other institutions. 
5. Local government, that is to say, the constitution and powers of 
municipal corporations, improvement trusts, districts boards, mining settlement 
authorities and other local authorities for the purpose of local self-gover

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Chatbot: Context: THE CONSTITUTION OF  INDIA
(Part XIVA. —Tribunals)185
1[(j)] any matter incidental to any of the matters specified in 
sub-clauses ( a) to 2[(i)]. 
(3) A law made under clause (1) may—
(a) provide for the establishment of a hierarchy of tribunals;
(b) specify the jurisdiction, powers (including the power to punish for 
contempt) and authority which may be exercised by each of the said 
tribunals;
(c) provide for the procedure (including provisions as to limitation 
and rules of evidence) to be followed by the said tribunals;
(d) exclude the jurisdiction of all courts, except the jurisdiction of the 
Supreme Court under article 136, with respect to all or any of the matters 
falling within the jurisdiction of the said tribunals; 
(e) provide for the transfer to each such tribunal of any cases pending 
before any court or any other authority immediately before the 
establishment of such tribunal as would have been within the jurisdiction 
of such tribunal if the causes 

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Chatbot: Context: be citizens. —No person shall be a citizen of India by virtue of article 5, or be 
deemed to be a citizen of India by virtue of article 6 or article 8, if he has 
voluntarily acquired the citizenship of any foreign State.
10. Continuance of the rights of citizenship. —Every person who is or 
is deemed to be a citizen of India under any of the foregoing provisions of this 
Part shall, subject to the provisions of any law that may be made by Parliament, 
continue to be such citizen.
11. Parliament to regulate the right of citizenship by law. —Nothing 
in the foregoing provisions of this Part shall derogate from the power of 
Parliament to make any provision with respect to the acquisition and 
termination of citizenship and all other matters relating to citizenship.

Question: Basic rights of citizen in India ?
Answer: If the question is whether the basic rights of a citizen of India are 
protected by Citizenship Rules, the answer is No. The provisions of the 
citizen's

In [7]:
def chatbot_interface():
    # Print heading
    print("\n" + "="*50)
    print(" " * 10 + "Welcome to the Constitution of India Chatbot!")
    print("="*50 + "\n")

    print("Type 'exit' to end the chat.")

    while True:
        question = input("You: ")
        if question.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break

        answer = conversational_qa_chain(question, document_embeddings, splits)
        print(f"Chatbot: {answer}")

chatbot_interface()



          Welcome to the Constitution of India Chatbot!

Type 'exit' to end the chat.
You: What are the laws applied to a rape case in India ?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Chatbot: Context: THE CONSTITUTION OF  INDIA
(Part XIVA. —Tribunals)185
1[(j)] any matter incidental to any of the matters specified in 
sub-clauses ( a) to 2[(i)]. 
(3) A law made under clause (1) may—
(a) provide for the establishment of a hierarchy of tribunals;
(b) specify the jurisdiction, powers (including the power to punish for 
contempt) and authority which may be exercised by each of the said 
tribunals;
(c) provide for the procedure (including provisions as to limitation 
and rules of evidence) to be followed by the said tribunals;
(d) exclude the jurisdiction of all courts, except the jurisdiction of the 
Supreme Court under article 136, with respect to all or any of the matters 
falling within the jurisdiction of the said tribunals; 
(e) provide for the transfer to each such tribunal of any cases pending 
before any court or any other authority immediately before the 
establishment of such tribunal as would have been within the jurisdiction 
of such tribunal if the causes 