In [1]:
import chromadb
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer

# ðŸ”¹ Load Local Embedding Model
embedding_model_path = "./models/all-MiniLM-L6-v2"
embedding_model = SentenceTransformer(embedding_model_path)

# ðŸ”¹ Load ChromaDB (Prebuilt Vector Database)
chroma_path = "./new_vector_db"
chroma_client = chromadb.PersistentClient(path=chroma_path)
collection = chroma_client.get_collection(name="ipc_sections")

# ðŸ”¹ Load Local LLM (Phi-2)
llm_path = "./models/Phi-2"  # Path where Phi-2 is saved
device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(llm_path)
model = AutoModelForCausalLM.from_pretrained(llm_path).to(device)

def retrieve_ipc_sections(query, top_k=3):
    """Retrieve the most relevant IPC sections based on a query."""
    query_embedding = embedding_model.encode(query).tolist()

    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=top_k
    )

    if results["ids"]:
        retrieved_texts = [
            f"Section {meta['section']}: {meta['description']}"
            for meta in results["metadatas"][0]
        ]
        return retrieved_texts
    return ["No relevant IPC section found."]

def generate_response(user_query):
    """Generate a response using retrieved IPC sections and local LLM."""
    retrieved_sections = retrieve_ipc_sections(user_query)

    # ðŸ”¹ Format retrieved sections as input for the LLM
    context = "\n".join(retrieved_sections)
    prompt = f"""You are a legal assistant. Based on the Indian Penal Code, analyze the following scenario:

    User Query: {user_query}
    
    Relevant IPC Sections:
    {context}
    
    Provide an explanation of how these sections apply to the given case.
    """

    # ðŸ”¹ Tokenize and Generate Response
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)
    output_tokens = model.generate(**inputs, max_length=2048, temperature=0.7, top_p=0.9)
    
    response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    return response

# ðŸ”¹ Example Usage
user_input = "I robbed someone's house"
response = generate_response(user_input)
print(response)





Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Add of existing embedding ID: IPC_140
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


You are a legal assistant. Based on the Indian Penal Code, analyze the following scenario:

    User Query: I robbed someone's house
    
    Relevant IPC Sections:
    Section IPC_457: Description of IPC Section 457
According to section 457 of Indian penal code, Whoever commits lurking house-trespass by night, or house-breaking by night in order to the committing of any offence punishable with imprisonment, shall be punished with imprisonment of either description for a term which may extend to five years, and shall also be liable to fine;
and, if the offence intended to be committed is theft, the term of the imprisonment may be extended to fourteen years.


IPC 457 in Simple Words
If someone commits lurking house-trespass or house-breaking at night with the intention of committing any punishable offense, they can be punished with imprisonment for up to five years and may also be fined; if the intended offense is theft, the imprisonment term can be extended to fourteen years.

Cited b