In [2]:
import os
import json
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, WebBaseLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA, LLMChain, StuffDocumentsChain
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain.schema import Document

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
pdf_loader = DirectoryLoader(r"C:\Major_Project\NITT_ChatBot\data", glob="*.pdf", loader_cls=PyPDFLoader)
pdf_docs = pdf_loader.load()

In [4]:
web_loader = WebBaseLoader(["https://www.nitt.edu/"])
web_docs = web_loader.load()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
pdf_chunks = text_splitter.split_documents(pdf_docs)
web_chunks = text_splitter.split_documents(web_docs)
all_chunks = pdf_chunks + web_chunks

In [6]:
with open(r"C:\Major_Project\NITT_ChatBot\data\FAQs.json", "r") as file:
    faqs = json.load(file)

# Convert FAQs into Documents


faq_docs = [
    Document(
        page_content=f"Question: {faq['patterns'][0]}\nAnswer: {faq['responses'][0]}",
        metadata={"tag": faq["tag"]}
    )
    for faq in faqs
]

In [7]:
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")

# Store PDFs & Website Data in ChromaDB
vector_db = Chroma.from_documents(all_chunks, embeddings, persist_directory="./chroma_db")
vector_db.persist()

# Store FAQs in ChromaDB
faq_vector_db = Chroma.from_documents(faq_docs, embeddings, persist_directory="./faq_db")
faq_vector_db.persist()

# Create Retrievers
retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
faq_retriever = faq_vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

  embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
  from .autonotebook import tqdm as notebook_tqdm
  vector_db.persist()


In [8]:
os.environ["GROQ_API_KEY"] = "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"
llm = ChatGroq(temperature=0.4, max_tokens=500, streaming=True)

# Define Prompt for Structured Responses
prompt_template = PromptTemplate.from_template("""
You are a helpful AI assistant for NIT Trichy. Answer the user's question using the most relevant information from FAQs and retrieved documents.

### FAQs Context:
{faq_context}

### Retrieved Documents:
{document_context}

### User Question:
{question}

### Answer:
""")


llm_chain = LLMChain(llm=llm, prompt=prompt_template)

  llm_chain = LLMChain(llm=llm, prompt=prompt_template)


In [9]:
def retrieve_faq(user_question):
    """Retrieve the most relevant FAQ based on similarity search."""
    similar_faqs = faq_retriever.get_relevant_documents(user_question)
    if similar_faqs:
        return similar_faqs[0].page_content  # Retrieve closest FAQ
    return "No relevant FAQ found"  # Default if no FAQ matches

def chatbot_response(user_question):
    """Integrate FAQs + document retrieval + LLM for structured responses."""
    retrieved_faq = retrieve_faq(user_question)
    retrieved_docs = retriever.get_relevant_documents(user_question)

    structured_prompt = prompt_template.format(
        faq_context=retrieved_faq,
        document_context="\n\n".join([doc.page_content for doc in retrieved_docs]),
        question=user_question
    )

    response = llm_chain.invoke({
    "faq_context": retrieved_faq if retrieved_faq else "No relevant FAQ found.",
    "document_context": "\n\n".join([doc.page_content for doc in retrieved_docs]) if retrieved_docs else "No relevant document found.",
    "question": user_question
})

    return response

In [10]:

while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit", "bye"]:
        print("Chatbot: Goodbye!")
        break
    bot_response = chatbot_response(user_input)
    print("You:",user_input)
    print("Chatbot:", bot_response["text"])

  similar_faqs = faq_retriever.get_relevant_documents(user_question)


You: tell me about yourself
Chatbot: I am an AI assistant created by Sweety Mahale for her final year college project. I am here to help answer questions and provide information relevant to NIT Trichy.
You: tell me about yourself
Chatbot: I am an AI assistant created by Sweety Mahale for her final year college project. I am here to help answer questions and provide information relevant to NIT Trichy.
You: tell me about NIT Trichy
Chatbot: National Institute of Technology (NIT) Trichy is located in the Tanjore Main Road, in National Highway 83, between BHEL and Thuvakudi, Tiruchirappalli. It is a public engineering institution of national importance and one of the 31 NITs in India. NIT Trichy offers undergraduate and postgraduate programs in various fields of engineering and technology, including Metallurgical and Materials Engineering, Electrical and Electronics Engineering, Mechanical Engineering, and Civil Engineering, among others. The institute also provides opportunities for stude

In [11]:
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit", "bye"]:
        print("Chatbot: Goodbye!")
        break
    bot_response = chatbot_response(user_input)
    print("You:",user_input)
    print("Chatbot:", bot_response["text"])
    

You: tell me some recent acivement of nit trichy
Chatbot: Sure, here are some recent achievements of NIT Trichy:

1. NIT Trichy has been ranked 11th among the top engineering institutions in India by the National Institutional Ranking Framework (NIRF) 2021.

2. The institute has been awarded the 'Star Status' by the Department of Science and Technology (DST) under the FIST (Fund for Improvement of S&T Infrastructure) program.

3. NIT Trichy has been selected as a mentor institution under the MHRD's (Ministry of Human Resource Development) "NIT excellence" initiative.

4. The institute has been granted an autonomous status by the Ministry of Human Resource Development (MHRD) for a period of 10 years.

5. NIT Trichy has been recognized as an Institution of Eminence (IoE) by the University Grants Commission (UGC).

6. The institute has been awarded the status of "Centre of Relevance and Excellence" (CORE) in the area of "Energy" by the MHRD.

7. NIT Trichy has been selected as a "Smart Ca