In [None]:
import os
from langchain_community.document_loaders import PyPDFLoader,PyMuPDFLoader,TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pathlib import Path

In [None]:
def process_all_pdfs(pdf_directory):
    all_documents = []
    pdf_dir = Path(pdf_directory)
    pdf_files = list(pdf_dir.glob("**/*.pdf"))
    print(f"Found {len(pdf_files)} PDF files to process")
    for pdf_file in pdf_files:
        print(f"\nProcessing: {pdf_file.name}")
        try:
            loader = PyPDFLoader(str(pdf_file))
            documents = loader.load()
            for doc in documents:
                doc.metadata['source_file'] = pdf_file.name
                doc.metadata['file_type'] = 'pdf'
            all_documents.extend(documents)
            print(f"Loaded {len(documents)} pages")
        except Exception as e:
            print(f"Error: {e}")
    print(f"\nTotal documents loaded: {len(all_documents)}")
    return all_documents

all_pdf_documents = process_all_pdfs("../data/pdf")

In [None]:
all_pdf_documents

In [None]:
import re
from langchain_community.document_loaders import PyPDFLoader
#from langchain.text_splitter import RecursiveCharacterTextSplitter

def load_all_acts():
    docs = []

    # Pharmacy Act (from PDF)
    pharmacy_loader = PyPDFLoader("../data/pdf/pharmacy.pdf")  # <-- adjust path if needed
    pharmacy_docs = pharmacy_loader.load()
    for d in pharmacy_docs:
        d.metadata["act_name"] = "Pharmacy Act (Nepali)"
        d.metadata["source_file"] = "pharmacy.pdf"
    docs.extend(pharmacy_docs)

    # Immunization Act (from PDF)
    imun_loader = PyPDFLoader("../data/pdf/immunization.pdf")  # <-- adjust path if needed
    imun_docs = imun_loader.load()
    for d in imun_docs:
        d.metadata["act_name"] = "Immunization Act (Nepali)"
        d.metadata["source_file"] = "immunization.pdf"
    docs.extend(imun_docs)

    print(f"Loaded {len(docs)} documents (pages) from PDFs")
    return docs

# Text splitting into chunks + adding section_number metadata
def split_documents(documents, chunk_size=800, chunk_overlap=150):
    # Try to split first on "धारा", then on newlines, etc.
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=[
            "\nधारा",   # split at section headings if present
            "\n\n",
            "\n",
            "। ",       # sentence end
            " "
        ],
    )

    split_docs = text_splitter.split_documents(documents)

    # Optionally extract section/धारा number into metadata if present
    for doc in split_docs:
        text = doc.page_content
        match = re.search(r"धारा\s*([०१२३४५६७८९0-9]+)", text)
        if match:
            doc.metadata["section_number"] = match.group(1)

    print(f"Split {len(documents)} documents into {len(split_docs)} chunks")
    if split_docs:
        print("\nExample chunk:")
        print(f"Content: {split_docs[0].page_content[:200]}...")
        print(f"Metadata: {split_docs[0].metadata}")

    return split_docs

In [None]:
raw_docs = load_all_acts()
chunked_docs = split_documents(raw_docs)

In [None]:
import re
import unicodedata

def clean_nepali_text(text: str) -> str:
    
    text = unicodedata.normalize("NFC", text)

   
    bad_chars = ["\uf0a7", "\uf0b7", "\uf0d8", "\uf0e5", "\uf022"]  
    for ch in bad_chars:
        text = text.replace(ch, " ")

    
    text = re.sub(r"[^\u0900-\u097F\s।,;:?!०-९0-9\-–]", " ", text)

    
    text = re.sub(r"\s+", " ", text).strip()
    return text


for d in chunked_docs:
    d.page_content = clean_nepali_text(d.page_content)

In [None]:
chunks=split_documents(all_pdf_documents)
chunks

In [None]:
TextLoader

In [None]:
#embedding and vectorstore db
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import uuid
from typing import List,Dict,Any,Tuple,Optional
from sklearn.metrics.pairwise import cosine_similarity
import torch

In [None]:



class EmbeddingManager:
    def __init__(
        self,
        model_name: str = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
        device: Optional[str] = None,
        normalize_embeddings: bool = True,
        batch_size: int = 32,
    ):
        """
        Embedding manager for multilingual (including Nepali) legal text.

        - model_name: sentence-transformers model ID
        - device: "cuda", "cpu", or None (auto-detect)
        - normalize_embeddings: if True, L2-normalize embeddings (good for cosine similarity)
        - batch_size: how many texts to encode per batch
        """
        self.model_name = model_name
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.normalize_embeddings = normalize_embeddings
        self.batch_size = batch_size
        self.model: SentenceTransformer | None = None
        self.load_model()
    
    def load_model(self):
        try:
            print(f"Loading embedding model: {self.model_name} on {self.device}")
            self.model = SentenceTransformer(self.model_name, device=self.device)
            dim = self.model.get_sentence_embedding_dimension()
            print(f"Model loaded successfully. Embedding dimension: {dim}")
        except Exception as e:
            print(f"Error loading model {self.model_name}: {e}")
            raise
    
    def generate_embeddings(self, texts: List[str]) -> np.ndarray:
        if self.model is None:
            raise ValueError("Model not loaded")
        print(f"Generating embeddings for {len(texts)} texts...")
        embeddings = self.model.encode(
            texts,
            batch_size=self.batch_size,
            show_progress_bar=True,
            convert_to_numpy=True,
            normalize_embeddings=self.normalize_embeddings,
        )
        print(f"Generated embeddings with shape: {embeddings.shape}")
        return embeddings


# create global instance
embedding_manager = EmbeddingManager()
embedding_manager

In [None]:
import os
import uuid
from typing import Any, List

import chromadb
import numpy as np


class VectorStore:
    def __init__(
        self,
        collection_name: str = "pdf_documents_v2",  # new name to avoid mixing old embeddings
        persist_directory: str = "../data/vector_store",
        reset: bool = False,  # if True, delete existing collection on init
    ):
        self.collection_name = collection_name
        self.persist_directory = persist_directory
        self.client = None
        self.collection = None
        self.reset = reset
        self._initialize_store()
    
    def _initialize_store(self):
        try:
            os.makedirs(self.persist_directory, exist_ok=True)
            self.client = chromadb.PersistentClient(path=self.persist_directory)

            # Optionally drop old collection (if you are rebuilding from scratch)
            if self.reset:
                try:
                    self.client.delete_collection(self.collection_name)
                    print(f"Deleted existing collection: {self.collection_name}")
                except Exception:
                    # If it doesn't exist yet, ignore
                    pass

            # Use cosine distance since we normalized embeddings
            self.collection = self.client.get_or_create_collection(
                name=self.collection_name,
                metadata={
                    "description": "PDF document embeddings for RAG (Nepali law)",
                    "hnsw:space": "cosine",  # important if you want cosine similarity
                },
            )
            print(f"Vector store initialized. Collection: {self.collection_name}")
            print(f"Existing documents in collection: {self.collection.count()}")
        except Exception as e:
            print(f"Error initializing vector store: {e}")
            raise

    def add_documents(self, documents: List[Any], embeddings: np.ndarray):
        if len(documents) != len(embeddings):
            raise ValueError("Number of documents must match number of embeddings")
        print(f"Adding {len(documents)} documents to vector store...")

        ids = []
        metadatas = []
        documents_text = []
        embeddings_list = []

        for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
            doc_id = f"doc_{uuid.uuid4().hex[:8]}_{i}"
            ids.append(doc_id)

            metadata = dict(getattr(doc, "metadata", {}))
            metadata["doc_index"] = i
            metadata["content_length"] = len(getattr(doc, "page_content", ""))
            metadatas.append(metadata)

            documents_text.append(getattr(doc, "page_content", ""))
            embeddings_list.append(embedding.tolist())

        try:
            self.collection.add(
                ids=ids,
                embeddings=embeddings_list,
                metadatas=metadatas,
                documents=documents_text,
            )
            print(f"Successfully added {len(documents)} documents to vector store")
            print(f"Total documents in collection: {self.collection.count()}")
        except Exception as e:
            print(f"Error adding documents to vector store: {e}")
            raise


# create a fresh store; use reset=True once when rebuilding
vectorstore = VectorStore(reset=True)
vectorstore

In [None]:
# 1) Load your acts from PDFs
raw_docs = load_all_acts()          # uses PyPDFLoader for pharmacy.pdf + immunization.pdf

# 2) Split into chunks
chunked_docs = split_documents(raw_docs)

# 3) Embed chunks with your embedding model (could be nepali_legal_model)
texts = [d.page_content for d in chunked_docs]
embeddings = embedding_manager.generate_embeddings(texts)

# 4) Add to the vector store
vectorstore.add_documents(chunked_docs, embeddings)

In [None]:
chunks

In [None]:
texts=[doc.page_content for doc in chunks]
texts



In [None]:
# 1) Load acts from PDFs
raw_docs = load_all_acts()          
chunked_docs = split_documents(raw_docs)

# 2) Embed chunks with the new EmbeddingManager
texts = [d.page_content for d in chunked_docs]
embeddings = embedding_manager.generate_embeddings(texts)

# 3) Add to the new vector store
vectorstore.add_documents(chunked_docs, embeddings)

In [None]:
rag_retriever = RAGRetriever(vectorstore, embedding_manager)

In [None]:
embeddings=embedding_manager.generate_embeddings(texts)
vectorstore.add_documents(chunks,embeddings)

In [None]:
results = rag_retriever.retrieve(
    "नेपाल फार्मेसी परिषद् ऐन, २०५७ अनुसार फार्मेसी व्यावसाय गनन के–के व्यवस्था छन्?",
    top_k=3,
)

for doc in results:
    print(f"\nRank {doc['rank']}, distance={doc['distance']:.4f}")
    print("Source:", doc["metadata"].get("source_file"), "page:", doc["metadata"].get("page"))
    print(doc["content"][:400])
    print("-" * 80)

In [None]:
answer = rag_simple("pharmacy council le ke vanxa yo ainma?", rag_retriever, llm)
print(answer)

In [None]:
import re

def contains_devanagari(text: str) -> bool:
    """Return True if any Devanagari characters are present."""
    return bool(re.search(r'[\u0900-\u097F]', text))


def normalize_to_nepali(query: str, llm) -> str:
    """
    If query is Romanized Nepali (Latin script), convert it to Nepali (Devanagari).
    If it's already in Devanagari, or clearly English, return as-is.
    """
    if contains_devanagari(query):
        return query

    prompt = f"""तलको इनपुट रोमन नेपाली (Latin script मा लेखिएको नेपाली) पनि हुन सक्छ,
वा अरू भाषा (जस्तै English) पनि हुन सक्छ।

तपाईंको काम:
- यदि इनपुट स्पष्ट रूपमा रोमन नेपाली छ भने, त्यसलाई सही नेपाली (देवनागरी) मा रूपान्तरण गर्नुहोस्।
- यदि इनपुट रोमन नेपाली होइन (जस्तै pure English प्रश्न) छ भने, त्यसलाई जस्ताको तस्तै फर्काउनुहोस्।
- कुनै पनि व्याख्या, अगाडि/पछि extra शब्दहरू नलेख्नुहोस्।
- केवल रूपान्तरण गरिएको वा original वाक्य मात्र आउटपुट गर्नुहोस्।

केही उदाहरण:
- "immunization act kaile aayeko ?" -> "खोप ऐन, २०७२ कहिले आएको हो ?"
- "pharmacy council le ke vanxa?" -> "फार्मेसी काउन्सिलले के भन्छ ?"
- "yo ain kahile lagu bhayo?" -> "यो ऐन कहिले लागू भयो ?"

प्रयोगकर्ताको इनपुट:
{query}

आउटपुट (केवल वाक्य):
"""
    resp = llm.invoke(prompt)
    return resp.content.strip()


def choose_where(query: str, norm_query: str) -> dict | None:
    """Decide which PDF to search based on keywords."""
    text = (query + " " + norm_query).lower()
    # Pharmacy related
    if any(word in text for word in ["pharmacy", "pharmasi", "फार्मेसी"]):
        return {"source_file": "pharmacy.pdf"}
    # Immunization / खोप related
    if any(word in text for word in ["immunization", "khop", "खोप", "इम्युनाइजेशन"]):
        return {"source_file": "immunization.pdf"}
    return None


def rag_simple(query: str, retriever, llm, top_k: int = 6) -> str:
    # 0) Normalize Roman Nepali -> Devanagari
    norm_query = normalize_to_nepali(query, llm)
    print("Original query:", query)
    print("Normalized query:", norm_query)

    where = choose_where(query, norm_query)
    print("Using where filter:", where)

    # 1) First try: retrieve with normalized Nepali
    results = retriever.retrieve(norm_query, top_k=top_k, where=where)

    # 1b) Fallback: if no results, try original query
    if not results:
        print("No results with normalized query, trying original query...")
        results = retriever.retrieve(query, top_k=top_k, where=where)

    if not results:
        return "सहित सन्दर्भ (context) फेला परेन, त्यसैले म जवाफ दिन सक्दिन।"

    # 2) Build context (truncate if too long)
    max_chars = 3500
    context_parts = []
    current_len = 0
    for doc in results:
        text = doc["content"]
        if current_len + len(text) > max_chars:
            break
        context_parts.append(text)
        current_len += len(text)
    context = "\n\n".join(context_parts)

    # 3) Prompt for answer
    prompt = f"""तपाईं नेपाली कानुन बुझ्ने कानुनी सहायक हुनुहुन्छ। तल दिइएको सन्दर्भ 
फार्मेसी र खोप सम्बन्धी नेपाली कानूनबाट लिइएको हो।

नियम:
- केवल सन्दर्भमा स्पष्ट रूपमा लेखिएको आधारमा मात्र जवाफ दिनुहोस्।
- यदि सोधिएको कुरा सन्दर्भमा स्पष्ट रूपमा छैन भने, जवाफमा लेख्नुहोस्:
  "मलाई थाहा छैन। यो जानकारी दिइएको सन्दर्भमा छैन।"
- आफ्नै अनुमान नगर्नुहोस्, अन्य सामान्य ज्ञान प्रयोग नगर्नुहोस्।
- सकेसम्म दफा नम्बर, परिच्छेद वा शीर्षकको नाम उल्लेख गर्नुहोस्।
- जवाफ छोटो तर ठोस, कानुनी रूपमा ठीक र नेपाली भाषामा दिनुहोस्।

सन्दर्भ:
{context}

प्रश्न (प्रयोगकर्ताको मूल इनपुट):
{query}

अन्तर्रूप (normalize) गरिएको प्रश्न:
{norm_query}

जवाफ नेपाली भाषामा:
"""

    response = llm.invoke(prompt)
    return response.content.strip()

In [None]:
rag_retriever = RAGRetriever(vectorstore, embedding_manager)

In [None]:
rag_retriever.retrieve("नेपालमा फार्मेसी सञ्चालन गर्न कानुनले के कस्ता प्रावधानहरू राखेको छ?")

In [None]:
print("Number of vectors in collection:", vectorstore.collection.count())

In [None]:
peek = vectorstore.collection.peek(3)
print("Keys:", peek.keys())
print("Sample documents:\n")

for i, doc in enumerate(peek.get("documents", [])):
    print(f"Item {i}:")
    print(doc[:400])  # first 400 characters
    print("-" * 80)

In [None]:
results = rag_retriever.retrieve(
    "यस खोप ऐन, २०७२ अनुसार खोप कार्यक्रम के हो?",
    top_k=3,
)

print("Type of results:", type(results))
print("Number of docs:", len(results))

for doc in results:
    print(f"\nRank: {doc['rank']}, distance: {doc['distance']:.4f}")
    print(doc['content'][:400])
    print("-" * 80)

In [None]:
from typing import Any, Dict, List, Optional

class RAGRetriever:
    def __init__(self, vector_store: VectorStore, embedding_manager: EmbeddingManager):
        self.vector_store = vector_store
        self.embedding_manager = embedding_manager

    def retrieve(
        self,
        query: str,
        top_k: int = 5,
        where: Optional[dict] = None,  # <-- NEW PARAM
    ) -> List[Dict[str, Any]]:
        print(f"Retrieving documents for query: '{query}'")
        print(f"Top k: {top_k}, where: {where}")

        # 1) Generate embedding for query
        query_embedding = self.embedding_manager.generate_embeddings([query])[0]

        retrieved_docs: List[Dict[str, Any]] = []

        try:
            # 2) Query the collection (pass where filter)
            results = self.vector_store.collection.query(
                query_embeddings=[query_embedding.tolist()],
                n_results=top_k,
                where=where,  # <-- USE where HERE
            )
            # print("Raw results from vector store:", results)  # optional debug

            # 3) Process results if there are any
            if results and results.get("documents") and results["documents"][0]:
                documents = results["documents"][0]
                metadatas = results["metadatas"][0]
                distances = results["distances"][0]
                ids = results["ids"][0]

                for i, (doc_id, document, metadata, distance) in enumerate(
                    zip(ids, documents, metadatas, distances)
                ):
                    retrieved_docs.append({
                        "id": doc_id,
                        "content": document,
                        "metadata": metadata,
                        "distance": distance,  # smaller = more similar
                        "rank": i + 1,
                    })

                print(f"Retrieved {len(retrieved_docs)} documents")
            else:
                print("No documents found")

        except Exception as e:
            print(f"Error during retrieval: {e}")

        # 4) Always return a list
        return retrieved_docs

In [None]:
rag_retriever = RAGRetriever(vectorstore, embedding_manager)

In [None]:
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv
load_dotenv()


groq_api_key = os.getenv("GROQ_API_KEY")
if groq_api_key is None:
    raise ValueError("GROQ_API_KEY not set in environment or .env file")

llm = ChatGroq(
    groq_api_key=groq_api_key,
    model_name="llama-3.1-8b-instant",
    temperature=0.1,
    max_tokens=1024,
)
def rag_simple(query: str, retriever, llm, top_k: int = 6) -> str:
    # 0) Normalize (Roman -> Devanagari)
    norm_query = normalize_to_nepali(query, llm)
    print("Normalized query:", norm_query)

    # Decide which file to search
    where = choose_where(query, norm_query)
    print("Using where filter:", where)

    # 1) Retrieve
    results = retriever.retrieve(norm_query, top_k=top_k, where=where)

    if not results:
        return "सहित सन्दर्भ (context) फेला परेन, त्यसैले म जवाफ दिन सक्दिन।"

    # 2) Build context
    max_chars = 3500
    context_parts = []
    current_len = 0
    for doc in results:
        text = doc["content"]
        if current_len + len(text) > max_chars:
            break
        context_parts.append(text)
        current_len += len(text)
    context = "\n\n".join(context_parts)

    # 3) Prompt
    prompt = f"""तपाईं नेपाली कानुन बुझ्ने कानुनी सहायक हुनुहुन्छ। तल दिइएको सन्दर्भ 
फार्मेसी र खोप सम्बन्धी नेपाली कानूनबाट लिइएको हो।

नियम:
- सोधिएको प्रश्नको सीधा जवाफ दिनुहोस्।
- मुख्य शर्तहरू वा प्रावधानहरू बुँदागत (१, २, ३...) रूपमा सारांश दिनुहोस्।
- अनावश्यक रूपमा एउटै वाक्य/वाक्यांश दोहोर्याउनु हुँदैन।
- सन्दर्भबाट ठ्याक्कै उतार्नु परे पनि, छोटो अंश मात्र उतार्नुहोस्, लामो वाक्य वा अनुच्छेद नदोहोऱ्याउनुहोस्।
- सकेसम्म ऐनको नाम र धारा/परिच्छेद नम्बर (metadata मा भए) उल्लेख गर्नुहोस्।
- यदि सोधिएको कुरा सन्दर्भमा स्पष्ट रूपमा छैन भने, जवाफमा लेख्नुहोस्:
  "मलाई थाहा छैन। यो जानकारी दिइएको सन्दर्भमा छैन।"
- आफ्नो अनुमान नगर्नुहोस्, अन्य सामान्य ज्ञान प्रयोग नगर्नुहोस्।

सन्दर्भ:
{context}

प्रश्न (प्रयोगकर्ताको मूल इनपुट):
{query}

अन्तर्रूप (normalize) गरिएको प्रश्न:
{norm_query}

जवाफ नेपाली भाषामा, छोटो र बुँदागत रूपमा:
"""

    response = llm.invoke(prompt)
    return response.content.strip()


In [None]:
def choose_where(query: str, norm_query: str) -> dict | None:
    text = (query + " " + norm_query).lower()
    # Pharmacy‑related
    if any(word in text for word in ["pharmacy", "pharmasi", "फार्मेसी"]):
        return {"source_file": "pharmacy.pdf"}
    # Immunization / खोप‑related
    if any(word in text for word in ["immunization", "khop", "खोप", "इम्युनाइजेशन"]):
        return {"source_file": "immunization.pdf"}
    # Otherwise search all docs
    return None

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

key = os.getenv("GROQ_API_KEY")
print("Key present:", key is not None)
print("Key length:", len(key) if key else None)
print("Key starts with:", key[:4] if key else None)

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")
print("Using key repr in llm init:", repr(groq_api_key))
print("Starts with:", groq_api_key[:8] if groq_api_key else None)
print("Length:", len(groq_api_key) if groq_api_key else None)

In [None]:
from groq import Groq
import os

client = Groq(api_key=os.getenv("GROQ_API_KEY"))

models = client.models.list()
for m in models.data:
    print(m.id)

In [None]:
import re

def contains_devanagari(text: str) -> bool:
    """Return True if any Devanagari characters are present."""
    return bool(re.search(r'[\u0900-\u097F]', text))

In [None]:
def normalize_to_nepali(query: str, llm) -> str:
    """
    If query is Romanized Nepali (Latin script), convert it to Nepali in Devanagari.
    If it's already in Devanagari, return as-is.
    """
    # If it already has Devanagari, don't touch it
    if contains_devanagari(query):
        return query

    prompt = f"""You are a transliteration engine, not a chatbot.

TASK:
- Convert Romanized Nepali written in Latin script into correct Nepali in Devanagari.
- Do NOT translate, rephrase, or change the meaning.
- Do NOT guess a different question.
- Keep all words; if you don't know how to transliterate a word, copy it as-is.
- Preserve question structure (question marks etc.).
- Output ONLY the converted sentence, no explanation.

GOOD examples (do this):
- "pharmacy ain le ke vanxa?" -> "फार्मेसी ऐनले के भन्छ ?"
- "pharmacy council le yo ainma ke vanxa?" -> "फार्मेसी काउन्सिलले यो ऐनमा के भन्छ ?"
- "immunization act kaile aayeko ?" -> "इम्युनाइजेशन ऐन कहिले आएको ?"
- "yo ain namane kehi karbahi hunxa?" -> "यो ऐन नमाने केहि कारबाही हुन्छ ?"

BAD examples (never do this):
- Changing "pharmacy ain le ke vanxa?" into 
  "फार्मेसी व्यवसाय सञ्चालन गर्न के–के शर्त चाहिन्छ?"  ✗  (WRONG: different meaning)
- Changing topic or inventing extra information.

User input:
{query}

Output (only the transliterated Nepali sentence):
"""

    resp = llm.invoke(prompt)
    normalized = resp.content.strip()
    return normalized

In [None]:
import gradio as gr

def rag_chat(message, chat_history):
    """
    message: user input (string)
    chat_history: list of (user, assistant) pairs
    """
    # Call your RAG pipeline
    answer = rag_simple(message, rag_retriever, llm)
    # Append to history
    chat_history = chat_history + [(message, answer)]
    # Return: clear the textbox (""), and updated history
    return "", chat_history

In [None]:
def append_char_to_textbox(current_text: str, ch: str) -> str:
    if current_text is None:
        current_text = ""
    return current_text + ch

In [None]:
import gradio as gr

def rag_chat(message, history):
    answer = rag_simple(message, rag_retriever, llm)
    return answer

def append_char_to_textbox(current_text: str, ch: str) -> str:
    if current_text is None:
        current_text = ""
    return current_text + ch

with gr.Blocks() as demo:
    # Your existing ChatInterface, unchanged
    chat = gr.ChatInterface(
        fn=rag_chat,
        title="AI Lawyer – Nepali Health Law Assistant",
        description=(
            "फार्मेसी परिषद् ऐन, २०५७ र खोप ऐन, २०७२ को आधारमा कानुनी जानकारीका लागि।\n\n"
            "> **Disclaimer:** यो केवल सूचना/शैक्षिक उद्देश्यको AI सहायक हो। "
            "यो आधिकारिक कानुनी सल्लाह होइन। दर्ता भएका वकिलसँग परामर्श गर्नुहोस्।"
        ),
        examples=[
            ["फार्मेसी व्यवसाय सञ्चालन गर्न के–के शर्त चाहिन्छ?"],
            ["खोप ऐन, २०७२ कहिले लागू भयो?"],
            ["pharmacy council le yo ainma ke vanxa?"],
        ],
    )

    # Nepali virtual keyboard under the chat
    gr.Markdown("**नेपाली किबोर्ड (क्लिक गरेर टाइप गर्नुहोस्):**")

    row1 = ["क", "ख", "ग", "घ", "ङ", "च", "छ", "ज", "झ", "ञ"]
    row2 = ["ट", "ठ", "ड", "ढ", "ण", "त", "थ", "द", "ध", "न"]
    row3 = ["प", "फ", "ब", "भ", "म", "य", "र", "ल", "व"]
    row4 = ["श", "ष", "स", "ह", "ा", "ि", "ी", "ु", "ू", "ृ"]
    special_keys = [
        ("Space", " "),   
        ("?", "?"),       
        ("।", "।"),       
        (",", ","),       
    ]

    def make_append_fn(ch):
        def fn(current_text):
            return append_char_to_textbox(current_text, ch)
        return fn

    with gr.Row():
        for ch in row1:
            btn = gr.Button(ch)
            btn.click(make_append_fn(ch), inputs=chat.textbox, outputs=chat.textbox)

    with gr.Row():
        for ch in row2:
            btn = gr.Button(ch)
            btn.click(make_append_fn(ch), inputs=chat.textbox, outputs=chat.textbox)

    with gr.Row():
        for ch in row3:
            btn = gr.Button(ch)
            btn.click(make_append_fn(ch), inputs=chat.textbox, outputs=chat.textbox)

    with gr.Row():
        for ch in row4:
            btn = gr.Button(ch)
            btn.click(make_append_fn(ch), inputs=chat.textbox, outputs=chat.textbox)
    with gr.Row():
        for label, ch in special_keys:
            btn = gr.Button(label)
            btn.click(make_append_fn(ch), inputs=chat.textbox, outputs=chat.textbox)

demo.launch()