In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
from langchain_community.document_loaders import DataFrameLoader ,WebBaseLoader   
import pandas as pd

In [2]:
#Loading environment variables from .env
load_dotenv(override=True)

True

##  Load data

In [3]:

def load_data_from_urls(file_path):

    # Step 1: Open the file
    with open(file_path, 'r') as file:
        # Step 2: Read the lines
        lines = file.readlines()

    # Step 3: Strip newline characters and store the values as a list
    urls = [line.strip() for line in lines]

    loader = WebBaseLoader(urls)
    return loader.load()


In [4]:
def load_data_from_dataframe(file_path):
    df = pd.read_csv(file_path)
    loader = DataFrameLoader(df, page_content_column="document")
    return loader.load()

In [62]:
#docs=load_data_from_urls("web_urls.txt")
docs=load_data_from_dataframe("../Experiments/datasets/ingestion/preprocessed/combined_faqs_preprocessed.csv")

In [63]:
# split the docs into chunks using recursive character splitter
def split_docs(documents,chunk_size=1500,chunk_overlap=200,type='csv'):
  separators=None
  if type=="web":
    print("Type is Web...")
    separators=["\n\n\n","\n\n","\n","(?<=\.)",""," "]

  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap,separators=separators)
  chunks = text_splitter.split_documents(documents)
  return chunks

# store the splitte documnets in docs variable
chunks = split_docs(documents=docs,chunk_size=1500,chunk_overlap=200)

In [64]:
print(len(chunks))

1010


## Creating Embeddings and Storing into ChromaDB

In [65]:
embeddings_openai = OpenAIEmbeddings(model="text-embedding-3-large")

In [66]:
# using chromadb as a vectorstore and storing the docs in it

db = Chroma.from_documents(collection_name="faqs",documents=chunks, embedding=embeddings_openai,persist_directory="./chroma_db")

In [81]:
vectordb=Chroma(
            collection_name="faqs",
            persist_directory="./chroma_db",
            embedding_function=embeddings_openai


        )

In [82]:
results = vectordb.similarity_search("Admission requirements for MA History Program?", k=2)
print(results[0].page_content)

What are the admission requirements?
                        








Students are admitted on the basis of academic achievement (grades) only. Additional non-academic requirements are not required for admission consideration. A minimum grade of 70% or higher is required in Grade 12 U English/Anglais (ENG4U/EAE4U preferred).


Learn more about History (BA (Hons))













                            How do I apply?
                        








All admissions to the History BA program are administered by Ryerson’s Admissions and Recruitment Office. 











                            How much does it cost to study at TMU?
                        








Current fees and financial information have been posted on the Office of the Registrar website.


View fees by program













                            How do I transfer to TMU from another university or college?
                        








For information on transferring to Ryerson, please talk to our Undergrad

## Loading LLM

In [68]:
#Initializing OpenAI API
llm = ChatOpenAI( model="gpt-4o-mini",
    temperature=.1,
    max_tokens=500,
    verbose=True,
    model_kwargs={"top_p":0.5}
    
    )

## RAG Chain

In [69]:
template = """SYSTEM:You are an intelligent assistant helping Toronto Metropolitan University Website visitors on their frequently asked questions in English.

Question: {question}

Strictly Use ONLY the following pieces of context to answer the question at the end. Think step-by-step and then answer.

Do not try to make up an answer:
-if the answer to the question cannot be determined from the context alone or if the context is empty,
say "I cannot determine the answer to that"
-Use numbered lists when possible

Always finish with a new line that says "\nYours Truly - UniQBot 1.0"
=============
{context}
=============

Question: {question}

Helpful Answer:"""

In [70]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
            template=template,
            input_variables=["context", "question"],
        )

In [71]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationalRetrievalChain

memory = ConversationBufferWindowMemory(
    k=3,
    memory_key="chat_history",
    return_messages=True,
    output_key='answer'
)

In [83]:
#retriever = db.as_retriever(search_kwargs={"k": 3,"score_threshold":.10}, search_type="similarity_score_threshold")
retriever = db.as_retriever(search_kwargs={"k": 3}, search_type="similarity")


In [73]:
qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    return_source_documents=True,
    return_generated_question = True,
    verbose=False,
    chain_type="stuff",
    combine_docs_chain_kwargs={'prompt': prompt},
)

In [74]:
def ask(question):

    phrases_to_check = ["i cannot determine the answer to that", "i do not know the answer to that", "i can help you with a variety of tasks", "i am uniq-bot 1.0"]

    result = qa({"question": question})

    # output=result["answer"].strip()
    # # clean_output=cleanhtml(output)
    print(result["answer"])

    if any(phrase in result["answer"].lower() for phrase in phrases_to_check):
        print('\nPlease ask a relevant question to UniQ-Bot.')
    else:
        print("\nView this link for more information: ",result["source_documents"][0].metadata['question_url'])



In [76]:
ask("What are the admission requirements for BA History?")

I cannot determine the answer to that.

Yours Truly - UniQBot 1.0

Please ask a relevant question to UniQ-Bot.


In [77]:
ask("Is there any scholarships available for the masters program?")

1. Yes, there are scholarships available for graduate students, including those in master's programs.
2. For more details on the specific scholarships and awards opportunities, you should visit the Graduate Studies Scholarships and Awards page.

Yours Truly - UniQBot 1.0

View this link for more information:  https://www.torontomu.ca/tedrogersschool/master-science-management/admissions/faqs/#accordion-content-1571336414548-are-there-any-scholarships-available-for-the-programs-


In [78]:
ask("What are the admission requirements for Masters in Nursing program?")

The admission requirements for the Masters in Nursing program at Toronto Metropolitan University are as follows:

1. An overall B standing in your undergraduate degree is the minimum required for admission.
2. Specific admissions requirements may vary depending on the program stream:
   - MN Course Stream Program Requirements
   - MN Thesis Stream Program Requirements
   - Combined MN/PHCNP Certificate Program Requirements
   - PHCNP Certificate Program Requirements

Please note that admission to all programs is competitive, and meeting the minimum requirements does not guarantee acceptance.

Yours Truly - UniQBot 1.0


KeyError: 'question_url'

In [79]:
ask("is there any placements facility for nursing program?")

1. Yes, there is a placement facility for the nursing program.
2. The Post Diploma Degree Nursing Program offers opportunities for placements in specialty areas such as critical care, intensive care, emergency, labour and delivery, or pediatrics.
3. However, these specialty placements are limited in number and follow a competitive process set by the placement agency.
4. Nursing practice placements may also involve working with diverse populations who experience various barriers.

Yours Truly - UniQBot 1.0

View this link for more information:  https://www.torontomu.ca/nursing/central-placement-office/faqs/post-diploma-program-placement-frequently-asked-questions/#accordion-content-1620292826842-will-i-be-able-to-request-a-placement-in-an-area-of-speciality-


In [80]:
ask("How do I get graduation ceremony tickets?")

To obtain tickets for the graduation ceremony, follow these steps:

1. **Register for your convocation ceremony**: You must request tickets during the registration process.
2. **Receive guest tickets**: Each graduate will receive guest tickets, each with a unique barcode. The number of guest tickets will be either 3 or 4, depending on how many graduates are in the ceremony.
3. **Distribute tickets**: You can either send the tickets directly to your guests via email or download and distribute them manually.
4. **Ensure unique tickets**: Each ticket can only be scanned once, so make sure each of your guests has a unique ticket (either on a mobile device or a printed copy) for entry into the Convocation Arena.
5. **Note on extra tickets**: Extra tickets for seating are not available for any convocation ceremony.

Yours Truly - UniQBot 1.0

View this link for more information:  https://www.torontomu.ca/convocation/faq/


In [51]:
ask("Do children need tickets to attend the ceremony?")

1. Anyone older than five years will need their own ticket.
2. Younger children can attend without a ticket but must sit in a guest's lap.
3. It is recommended that guests with young children sit near exits in case they need to step outside during the ceremony.

Yours Truly - UniQBot 1.0

View this link for more information:  https://www.torontomu.ca/convocation/faq/


In [117]:
ask("What time graduation ceremony takes place?")

The graduation ceremonies take place at two different times:

1. 9:30 a.m. (arrive by 8:00 a.m.)
2. 3:30 p.m. (arrive by 2:00 p.m.)


In [119]:
ask("can we wear hats?")

Only PhD graduates wear mortarboards/hats as part of their regalia.
