In [None]:
# Install required libraries
!pip install llama-index
!pip install PyMuPDF  # for extracting text from PDFs
!pip install langchain
!pip install openai  # required by llamaindex if you're using OpenAI embeddings
!pip install sentence-transformers  # to use other embeddings like SBERT
!pip install faiss-cpu  # for similarity search




In [None]:
import os
import fitz  # PyMuPDF
from llama_index.core import Document
from sentence_transformers import SentenceTransformer

# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)  # Open the PDF file
    text = ""
    for page_num in range(doc.page_count):
        page = doc.load_page(page_num)  # Load each page
        text += page.get_text("text")  # Extract text from the page
    return text

# Function to clean text (you can modify this as per your needs)
def clean_text(text):
    text = text.replace('\n', ' ').strip()  # Remove unwanted newlines and spaces
    return text

# Function to load all PDFs in a folder and extract text
def extract_texts_from_folder(folder_path):
    texts = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.pdf'):
            file_path = os.path.join(folder_path, filename)
            text = extract_text_from_pdf(file_path)
            clean_txt = clean_text(text)
            texts.append(clean_txt)
    return texts


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Update the path to point to the correct directory in Google Drive
pdf_directory = '/content/drive/My Drive/LawBot/'

folders = {
    "murder": os.path.join(pdf_directory, 'murder'),
    "theft": os.path.join(pdf_directory, 'theft'),
    "land": os.path.join(pdf_directory, 'land'),
    "divorce": os.path.join(pdf_directory, 'divorce')
}


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
os.environ['OPENAI_API_KEY'] = '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'

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

# Define the folder paths for each topic
folders = {
    "murder": '/content/drive/MyDrive/LawBot/murder',
    "theft": '/content/drive/MyDrive/LawBot/theft',
    "land": '/content/drive/MyDrive/LawBot/land',
    "divorce": '/content/drive/MyDrive/LawBot/divorce'
}

# Extract and store the text data
topic_texts = {}
for topic, folder in folders.items():
    topic_texts[topic] = extract_texts_from_folder(folder)

# Convert the extracted texts into LlamaIndex Document objects
documents = []
for topic, texts in topic_texts.items():
    for text in texts:
        documents.append(Document(text))

# Initialize the index using VectorStoreIndex
index = VectorStoreIndex.from_documents(documents)

# Save the index to disk (optional)
index.storage_context.persist('vector_index')


In [None]:
import os
import re
import openai
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import time
from openai import RateLimitError

# Use a local embedding model
embed_model = SentenceTransformer('all-MiniLM-L6-v2')

def load_documents(directory_path):
    all_documents = []
    for folder in os.listdir(directory_path):
        folder_path = os.path.join(directory_path, folder)
        if os.path.isdir(folder_path):
            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)
                try:
                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
                        all_documents.append(file.read())
                except Exception as e:
                    print(f"Error reading file {file_name}: {e}")
    return all_documents

def create_faiss_index(documents):
    embeddings = embed_model.encode(documents)
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(np.array(embeddings))
    return index, embeddings

def keyword_search(query, documents):
    matching_docs = []
    for doc in documents:
        if re.search(r'\b' + re.escape(query) + r'\b', doc, re.IGNORECASE):
            matching_docs.append(doc)
    return matching_docsdef

def semantic_search(query, index, documents, top_k=5):
    query_embedding = embed_model.encode([query])
    D, I = index.search(np.array(query_embedding), top_k)
    return [documents[i] for i in I[0]]

def generate_response(context, query, max_retries=3, retry_delay=5):
    for attempt in range(max_retries):
        try:
            response = openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": f"Here is a relevant document: {context}. Based on this, please answer the following query: {query}"}
                ],
                max_tokens=150
            )
            return response.choices[0].message.content
        except RateLimitError as e:
            if attempt < max_retries - 1:
                print(f"Rate limit exceeded. Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                raise e

def combined_search(query, documents, index):
    keyword_results = keyword_search(query, documents)
    semantic_results = semantic_search(query, index, documents)
    combined_results = set(keyword_results).union(set(semantic_results))
    return combined_results

def chunk_documents(docs, max_chunk_size=500):
    chunks = []
    for doc in docs:
        words = doc.split()
        for i in range(0, len(words), max_chunk_size):
            chunks.append(" ".join(words[i:i + max_chunk_size]))
    return chunks

def rag_pipeline(query, documents, index, max_docs=5, max_chunk_size=300, max_tokens=4000):
    # Step 1: Retrieve relevant documents based on both keyword and semantic search
    retrieved_documents = combined_search(query, documents, index)
    retrieved_documents = list(retrieved_documents)[:max_docs]

    # Step 2: Break the documents into chunks
    chunked_documents = chunk_documents(retrieved_documents, max_chunk_size)

    # Step 3: Concatenate all chunks into one large context
    full_context = " ".join(chunked_documents)

    # Step 4: Ensure the concatenated context doesn't exceed the token limit
    full_context = full_context[:max_tokens]

    # Step 5: Query the LLM with the concatenated context
    overall_response = generate_response(full_context, query)

    return overall_response

# Load documents
documents = load_documents('/content/drive/MyDrive/LawBot')

# Build FAISS index
index, embeddings = create_faiss_index(documents)

# Test the modified RAG system with a query
case_briefing = "Calcutta High Court: A petition was filed by Joint Platform for Doctors (petitioners) an umbrella association of doctors and citizens challenging an order dated 14-10-2024 issued by the Commissioner of Police under Section 163(1) and (3) of the Bhartiya Nagarik Suraksha Sanhita (BNSS), 2023 prohibiting any unlawful assembly of five or more persons in Kolkata on the grounds that such assemblies could breach public peace and tranquility during the “Immersion Carnival” (visarjan) organized by the State. Ravi Krishan Kapur, J., permitted the doctors to hold the “Droher Carnival- Doctors and Citizens Assembly” and directed the State to put barricades and/or guard rails between Rani Rashmoni Avenue and Red Road to ensure that no breach of peace occurs. The Court also directed “The police shall also make necessary arrangements for security with a required number of police personnel to ensure that there is no breach of peace at the proposed “Droher Carnival”. The organizers of the rally are also requested to have enough volunteers to ensure that no breach of peace occurs.” The petitioners consist of a doctors’ association, a member doctor, and a concerned citizen, all advocating for justice in response to a horrific incident, the alleged rape and murder of a trainee doctor. Their primary grievance arises from the refusal of the Kolkata Police to permit them to conduct the “Droher Carnival” on 15-10-2024, at Rani Rashmoni Road, a peaceful assembly meant to highlight the issue and demand justice. The rejection of their request was formalized in a communication from the Commissioner of Police on 13-10-2024. The Commissioner’s decision was based on concerns that the demonstration would coincide with the ongoing Pujo “Immersion Carnival,” and that it could potentially disrupt the safety and security of participants and visitors. Following the rejection, the Commissioner issued a blanket order prohibiting any public assembly on the same day in Kolkata, citing the need to prevent disturbances during the Pujo celebrations. Counsel for the petitioners contended that the refusal to allow the Droher Carnival and the subsequent order prohibiting assemblies violated their fundamental rights under Articles 19(1)(a) and 19(1)(b) of the Constitution, which guarantee freedom of speech and the right to assemble peacefully. They argued that the restrictions were disproportionate, arbitrary, and lacking justification under the law. The petitioners also submitted that the ban on public assemblies was excessive, as it imposed a blanket restriction that extended far beyond the legitimate purpose of maintaining public order. They contended that peaceful protests are an essential part of democracy, and restricting their right to assemble based on hypothetical fears of disturbance during a different event was unwarranted. Counsel for State argued that the timing and location of the Droher Carnival posed a potential threat to public order and safety, given its proximity to the Pujo Immersion Carnival on the same day. The State justified the refusal of permission on the grounds that holding two major public events simultaneously in nearby locations could lead to chaos and disruption. The State emphasized the need for balance between the right to protest and maintaining public order. It also submitted that the matter raised by the petitioners was already under consideration by the Supreme Court in a related case, thereby arguing that the High Court should refrain from passing any orders. The Court acknowledged the fundamental right of citizens to protest peacefully, emphasizing that such rights are protected under the Constitution and are crucial in a democratic society. The Court observed that while the right to protest is a cornerstone of democracy, any restriction on this right must meet the test of reasonableness and necessity in maintaining public order. The Court was critical of the blanket prohibition imposed by the impugned order, noting that the scope of the restriction was excessive and disproportionate to the purported aim of preventing a breach of peace during the Pujo Immersion Carnival. The Court found that the concerns raised by the State were speculative, as there was no evidence that holding the Droher Carnival would necessarily lead to a breakdown of law and order. Additionally, the Court rejected the State’s proposal that the petitioners could postpone their event or choose a different venue, emphasizing that such conditions imposed an unreasonable burden on the petitioners’ right to freely assemble. The Court remarked that “the failure of the impugned order to provide a substantial basis for restricting public assembly led the court to conclude that it was arbitrary and an overreach of executive power.” Thus, the Court allowed the writ petition and held that the petitioners had the right to hold the Droher Carnival on 15-10-2024 at Rani Rashmoni Road as planned and directed the State to put in place adequate security measures, including barricades and police personnel, to ensure that both the Droher Carnival and the Pujo Immersion Carnival could take place without any disturbances. The Court dismissed the State’s prayer to stay the operation of its order, directing that the order be communicated immediately to ensure compliance....Advocates who appeared in this case : Mr. Bikash Ranjan Bhattacharyya, Sr. Adv., Mr. Samim Ahammed, Mr. Tapas Maity, Mr. Siddhartha Sankar Mondal, Mr. Satwick Majumdar, Mr. Saptarshi Banerjee, Mr. Rajit Lal Maitra, Mr. Arka Maiti, Ms. Ambiya Khatoon, Mr. Saloni Bhattacharjee, Mr. Nazimuddin Siddique, … Advocates for the petitioners. Mr. Kishore Dutta, Ld. A. G., Mr. Amitesh Banerjee, Sr. Adv., Mr. Debangshu Dinda, Adv., … Advocates for the State...."

response = rag_pipeline(case_briefing, documents, index)

# Display the result
print("Generated Response:")
print(response)


Generated Response:
Based on the document you provided regarding the petition filed by the Joint Platform for Doctors challenging the order issued by the Commissioner of Police, here is a summary of the key points:

1. The petitioners sought permission to hold the "Droher Carnival - Doctors and Citizens Assembly" in response to a horrific incident but were initially denied by the Kolkata Police.

2. The refusal was based on concerns that the demonstration could disrupt the safety and security of participants and visitors during the ongoing Pujo Immersion Carnival.

3. The Commissioner then issued a blanket order prohibiting public assemblies in Kolkata on the same day, citing the need to prevent disturbances during the Pujo celebrations.

4. The petitioners argued that the refusal and subsequent order violated their fundamental rights under Articles
