In [7]:
from langchain.document_loaders import UnstructuredURLLoader


def get_mu_data():
    urls = [
    "https://www.metrouni.edu.bd/",
    "https://www.metrouni.edu.bd/sites/university/contact",
    "https://www.metrouni.edu.bd/sites/university/history",

        ]

    loader = UnstructuredURLLoader(urls=urls)
    raw_docs = loader.load()
    return raw_docs

In [10]:
mu_data=get_mu_data()

In [15]:
#Split the Data into Text Chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
def text_split(mu_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    text_chunks=text_splitter.split_documents(mu_data)
    return text_chunks

In [16]:
text_chunks=text_split(mu_data)
print("Length of Text Chunks", len(text_chunks))

Length of Text Chunks 15


In [23]:
#Download the Embeddings from Hugging Face
from langchain.embeddings import HuggingFaceEmbeddings
def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

embeddings = download_hugging_face_embeddings()

  embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')


In [19]:

from dotenv import load_dotenv
import os
load_dotenv()

True

In [20]:
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
GROQ_API_KEY=os.environ.get("GROQ_API_KEY")
os.environ['PINECONE_API_KEY']=PINECONE_API_KEY
os.environ["GROQ_API_KEY"]=GROQ_API_KEY

In [21]:
# Step 3: Initialize Pinecone
from pinecone import Pinecone

pinecone_api_key = PINECONE_API_KEY

pc = Pinecone(api_key=pinecone_api_key)

In [22]:
from pinecone import ServerlessSpec

index_name = "mu-chatbot"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

index = pc.Index(index_name)

  from .autonotebook import tqdm as notebook_tqdm


In [25]:
# Embed each chunk and upsert the embeddings into your Pinecone index.
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings, 
)

In [26]:
docsearch.similarity_search("what is the name of metropolitan university Founder", k=3)


[Document(id='418be61a-cd9a-4a6e-9928-cb897f3bfd0e', metadata={'source': 'https://www.metrouni.edu.bd/sites/university/history'}, page_content='With the approval of the Ministry of Education under the Private University Act of 1992 (which was amended first in 1998 and then again in 2010), Metropolitan University came into being on 3rd May 2003.\n\nDr. Toufique Rahman Chowdhury, an educationist and a promising entrepreneur of the country initiated the idea of founding Metropolitan University with the direct participation and assistance of a few of his close friends, acquaintances, and associates.\n\n“2018” Celebrating Fifteen Years of Success in Quality Education\n\n“2017” Inauguration of Permanent Campus\n\n“2016” Celebrated thirteen years of Excellence in Education\n\n“2015” Second Convocation of the University\n\n“2014” Membership of the Accreditation Service for International Schools, Colleges & Universities\n\n“2013” Celebrated Ten years of Serving the country through ensuring qual

In [27]:
# Load Existing index 
# Embed each chunk and upsert the embeddings into your Pinecone index.
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [28]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [30]:
res=retriever.invoke("what is the name of metropolitan university Founder?")
res[1].page_content

"Message From The Vice Chancellor\n\n\n\nProfessor Dr. Mohammad Jahirul Hoque\n\nMetropolitan University was established in 2003 with the mission of providing quality tertiary education at an affordable cost. Located, far away from the nation's capital, in the holy city of Sylhet, the university has been engaged in shaping up the future citizens of the country, in grooming them as 'whole' human beings imbued with values, love for their country and roots, and equipped with proper knowledge and expertise in related fields. 21 years is not a great stretch of time to reach any peak but we are determined to make it. We pledge to go ahead with the vision of making Metropolitan University an enviable seat of academic excellence.\n\nUseful Link & Form\n\nLeave Application Record: Link\n\nStation Leave Form: Downloadable PDF\n\nOnline Admission Form: Link\n\nSCHOOL OF SCIENCE & TECHNOLOGY"

In [32]:
from langchain_groq import ChatGroq
model=ChatGroq(
        temperature = 0.3,
        model="llama3-70b-8192",
        api_key= os.getenv("GROQ_API_KEY"),
    )

In [33]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [34]:
system_prompt = ("""
    "You are MU Chatbot, the official virtual assistant for Metropolitan University Bangladesh."
    "Your role is to provide accurate, helpful, and friendly responses to students, faculty, staff, and visitors. You are knowledgeable about the university's academic programs, departments, admission process, tuition fees, campus facilities, events, and policies."
     You must:
        - Use a polite, professional, and encouraging tone.
        - Keep responses concise but informative.
        - If unsure about an answer, suggest contacting the university administration directly.
        - Avoid giving legal, medical, or financial advice.
    University details:
        - Name: Metropolitan University Bangladesh
        - Location: Sylhet, Bangladesh
        - Website: https://metrouni.edu.bd/
        - Programs offered: Undergraduate and Postgraduate (Engineering, Business, Law, English, etc.)
        - Contact: info@metrouni.edu.bd | +880 821 720303

    Examples of tasks you can assist with:
        - "How can I apply for admission?"
        - "What are the tuition fees for Computer Science?"
        - "Where is the academic calendar?"
        - "Tell me about hostel facilities."
        - "What are the office hours of the registrar?"

    Always stay respectful and helpful, representing the values of Metropolitan University.
    "\n\n"
    "{context}"
                 """
)

In [35]:

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [36]:
question_answer_chain = create_stuff_documents_chain(model, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [37]:
response = rag_chain.invoke({"input": "who is Dr. Toufique Rahman Chowdhury?"})
print(response["answer"])

Dr. Toufique Rahman Chowdhury is the Founder of Metropolitan University Bangladesh. He is also the Chairman Emeritus of the university.


In [40]:
response = rag_chain.invoke({"input": "who is Razib Dash"})
print(response["answer"])

I'm not aware of any information about a person named Razib Dash being associated with Metropolitan University Bangladesh. It's possible that Razib Dash is a student, faculty member, or staff of the university, but I don't have any specific information about them.

If you could provide more context or details about who Razib Dash is or what they are related to, I may be able to help you better. Alternatively, you can contact the university administration directly at info@metrouni.edu.bd or +880 821 720303 to inquire about Razib Dash.
