In [54]:
import os
from dotenv import load_dotenv

load_dotenv()  # Load the .env file

True

In [55]:
from llama_index.llms.groq import Groq
from llama_index.core import VectorStoreIndex, load_index_from_storage, StorageContext

llm = Groq(model="llama3-8b-8192", api_key=os.getenv("GROQ_API_KEY"))

In [56]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from chromadb import PersistentClient

# Load the embedding model explicitly
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

persist_dir = "./chroma_db2"
client = PersistentClient(path=persist_dir)
collection = client.get_or_create_collection("rag-collection")
vector_store = ChromaVectorStore(chroma_collection=collection, persist_dir=persist_dir)

# Use the same embedding model while loading index
storage_context = StorageContext.from_defaults(
    persist_dir="./index",
    vector_store=vector_store
)

index = load_index_from_storage(storage_context, embed_model=embed_model)

Loading llama_index.core.storage.kvstore.simple_kvstore from ./index\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./index\index_store.json.


In [57]:
from sentence_transformers import CrossEncoder

# Load a cross-encoder model for re-ranking
reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

def rerank(query, docs):
    pairs = [[query, doc] for doc in docs]
    scores = reranker.predict(pairs)
    sorted_docs = [doc for _, doc in sorted(zip(scores, docs), reverse=True)]
    return sorted_docs

In [58]:
def generate_answer(query):
    # Retrieve top-10 docs from vector store
    retriever = index.as_retriever(similarity_top_k=10)
    nodes = retriever.retrieve(query)
    doc_texts = [node.get_content() for node in nodes]
    
    # Re-rank and pick top 3
    top_docs = rerank(query, doc_texts)[:3]
    
    # Combine top docs into a single context string
    context = "\n\n---\n\n".join(top_docs)
    
    # Format the prompt
    prompt = f"""
    PU stands for Panjab University, Chandigarh, and you are PU-Assistant — a helpful and professional virtual guide for students of Panjab University and those seeking admission.
    
    You must strictly use only the information provided below to answer the student's question. Follow these rules carefully:
    
    1. If the answer involves steps, rules, eligibility, or form-related info — present it in **brief bullet points**.
    2. If a **URL or downloadable form** is available, mention it politely at the end.
    3. For simple factual or definition-type questions, reply with a **clear, concise sentence**.
    4. If a **web link or page** is useful to the question, include it directly in your answer.
    5. If the answer cannot be found, respond politely:
       - "**Sorry, I couldn't find that information. You may contact the university administration.**"
       - Or: "**Sorry, I couldn't help you with that. Please visit the official website for more details.**"
       Choose the most suitable one **without mentioning missing data or sources**.
    6. **Do NOT mention anything about “context”, “data not found in context”, or “context doesn’t mention”**. Just respond professionally as a helpdesk would.
    7. If both INR (₹) and USD ($) amounts are found, always show only the ₹ fee.
    8. Maintain a **formal, polite tone** in every response.
    9. At the end, suggest three follow-up questions as:
       **Know more about:** (list 3 bullet points, each max 5 words).
    10. Do not make up information. Use only what's provided.
    
    Information:
    {context}
    
    Question: {query}
    
    Answer:
    """
    
    response = llm.complete(prompt)
    print(response.text)

In [59]:
query = input("Enter your query:\n")

generate_answer(query)

Enter your query:
 What are the eligibility criteria for the MBA program?


The eligibility criteria for the MBA program at Panjab University are as follows:

* A Bachelor's Degree in any stream with not less than 50% marks in aggregate from a recognized University.
* Candidates must have appeared in CAT-2023, followed by appearance in Group Discussion and Personal Interview.
* 5% concession is admissible in eligibility marks to SC/ST/BC/PwD candidates.

Note: The above information is based on the provided Handbook of Information 2025.

Know more about:
• CAT-2023 requirements
• Group Discussion and Personal Interview process
• Eligibility marks for reserved categories
