In [1]:
import os
import chromadb
from sentence_transformers import SentenceTransformer

chroma_client = chromadb.Client()
collection = chroma_client.get_or_create_collection(name="supreme_court_judgements")

embedder = SentenceTransformer("all-MiniLM-L6-v2")  # Or use a legal-domain-specific model if available


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import json
import uuid

processed_folder = r"C:\Users\ASUS\Documents\ITProfound\dev\Backend\pdfs\Extracted"

for file in os.listdir(processed_folder):
    if file.endswith(".json"):
        with open(os.path.join(processed_folder, file), "r", encoding="utf-8") as f:
            data = json.load(f)
            doc_id = str(uuid.uuid4())
            content = data["content"]
            embedding = embedder.encode(content)
            
            collection.add(
                documents=[content],
                embeddings=[embedding],
                ids=[doc_id],
                metadatas=[{"filename": data["filename"]}]
            )

In [13]:
query = "Kerala"
query_embedding = embedder.encode(query)

results = collection.query(
    query_embeddings=[query_embedding],
    n_results=5
)

for i, doc in enumerate(results["documents"][0]):
    print(f"Result {i+1}:\n{doc[:500]}...\n")  # Print top 500 chars
    print("From file:", results["metadatas"][0][i]["filename"], "\n")
    print("=" * 80)


Result 1:
2025 INSC 773
 Civil Appeal  No.14915 of 2024 etc.   Page 1 of 16 NON-REPORTABLE  
 
IN THE SUPREME COURT OF INDIA  
CIVIL APPELLATE JURISDICTION  
 
CIVIL APPEAL NO.  14915  OF 2024  
 
Maya P.C. & Ors.                             … Appellant s 
    
 
 versus  
 
 
The State  of Kerala & Anr.                   … Respondent s 
  
with  
 
CIVIL APPEAL NOS.14916 -14917 OF 2024  
 
CIVIL APPEAL NO.14918 OF 2024  
 
and 
 
CIVIL APPEAL NO.14919 OF 2024  
 
      J U D G M E N T  
ABHAY S. OKA, J. ...

From file: MAYA_P.C._VS._STATE_OF_KERALA.pdf 

Result 2:
2025 INSC 809
REPORTABLE
IN THE SUPREME COURT OF INDIA
CRIMINAL APPELLATE JURISDICTION
CRIMINAL APPEAL NO.2897 OF 2025
(Arising out of SLP (Crl.) No.14740 of 2024)
 
DHANYA M                          … APPELLANT(S)
Versus
STATE OF KERALA & ORS.             … RESPONDENT(S)
J U D G M E N T
Sanjay Karol, J.
Leave Granted.
2.The present appeal arises from the final judgment and
order dated 4th September, 2024 passed by the High 

In [None]:
import gradio as gr
import chromadb
from sentence_transformers import SentenceTransformer

# Load ChromaDB collection
chroma_client = chromadb.Client()
collection = chroma_client.get_or_create_collection(name="supreme_court_judgements")

# Load embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Search function
def semantic_search(query, top_k=5):
    if not query.strip():
        return "Please enter a search query."
    
    embedding = model.encode(query)
    
    results = collection.query(
        query_embeddings=[embedding],
        n_results=top_k
    )

    output = ""
    for i, (doc, meta) in enumerate(zip(results["documents"][0], results["metadatas"][0])):
        output += f"### Result {i+1} (from {meta['filename']}):\n"
        output += doc[:1000].strip() + "\n\n"  # Limit to first 1000 characters
        output += "---\n"

    return output if output else "No results found."

# Gradio UI
with gr.Blocks(title="Supreme Court Judgments Search") as demo:
    gr.Markdown("## 🔍 Supreme Court Judgment Semantic Search")
    
    with gr.Row():
        query_input = gr.Textbox(placeholder="Enter your legal query here...", label="Search Query", lines=1)
        top_k_slider = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of Results")

    search_button = gr.Button("Search")
    output_text = gr.Markdown()

    search_button.click(semantic_search, inputs=[query_input, top_k_slider], outputs=output_text)

demo.launch() 


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


