In [131]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
from langchain_community.document_loaders import DataFrameLoader ,WebBaseLoader   
import pandas as pd

In [18]:
#Loading environment variables from .env
load_dotenv()

True

##  Load data

In [4]:

def load_data_from_urls(file_path):

    # Step 1: Open the file
    with open(file_path, 'r') as file:
        # Step 2: Read the lines
        lines = file.readlines()

    # Step 3: Strip newline characters and store the values as a list
    urls = [line.strip() for line in lines]

    loader = WebBaseLoader(urls)
    return loader.load()


In [126]:
def load_data_from_dataframe(file_path):
    df = pd.read_csv(file_path)
    loader = DataFrameLoader(df, page_content_column="document")
    return loader.load()

In [127]:
#docs=load_data_from_urls("web_urls.txt")
docs=load_data_from_dataframe("../RnD/datasets/ingestion/preprocessed/combined_faqs_preprocessed.csv")

In [129]:
# split the docs into chunks using recursive character splitter
def split_docs(documents,chunk_size=1500,chunk_overlap=200,type='csv'):
  separators=None
  if type=="web":
    print("Type is Web...")
    separators=["\n\n\n","\n\n","\n","(?<=\.)",""," "]

  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap,separators=separators)
  chunks = text_splitter.split_documents(documents)
  return chunks

# store the splitte documnets in docs variable
chunks = split_docs(documents=docs,chunk_size=1500,chunk_overlap=200)

In [130]:
print(len(chunks))

1010


## Creating Embeddings and Storing into ChromaDB

In [104]:
embeddings_openai = OpenAIEmbeddings(model="text-embedding-3-large")

In [105]:
# using chromadb as a vectorstore and storing the docs in it

db = Chroma.from_documents(collection_name="faqs",documents=chunks, embedding=embeddings_openai,persist_directory="./chroma_db")

In [106]:
results = db.similarity_search("Admission requirements for MA History Program?", k=2)
print(results[0].page_content)

What are the admission requirements?
                        








Students are admitted on the basis of academic achievement (grades) only. Additional non-academic requirements are not required for admission consideration. A minimum grade of 70% or higher is required in Grade 12 U English/Anglais (ENG4U/EAE4U preferred).


Learn more about History (BA (Hons))













                            How do I apply?
                        








All admissions to the History BA program are administered by Ryerson’s Admissions and Recruitment Office. 











                            How much does it cost to study at TMU?
                        








Current fees and financial information have been posted on the Office of the Registrar website.


View fees by program













                            How do I transfer to TMU from another university or college?
                        








For information on transferring to Ryerson, please talk to our Undergrad

## Loading LLM

In [132]:
#Initializing OpenAI API
llm = ChatOpenAI( model="gpt-4o-mini",
    temperature=.1,
    max_tokens=500,
    verbose=True,
    model_kwargs={"top_p":0.5}
    
    )

## RAG Chain

In [108]:
template = """SYSTEM:You are an intelligent assistant helping Toronto Metropolitan University Website visitors on their frequently asked questions in English.

Strictly Use ONLY the following pieces of context to answer the question at the end. Think step-by-step and then answer.

Do not try to make up an answer:
-if the answer to the question cannot be determined from the context alone or if the context is empty,
only say "I cannot determine the answer to that"
-Use numbered lists when possible

Context:
=============
{context}
=============

Question: {question}

Helpful Answer:"""

In [109]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
            template=template,
            input_variables=["context", "question"],
        )

In [110]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationalRetrievalChain

memory = ConversationBufferWindowMemory(
    k=3,
    memory_key="chat_history",
    return_messages=True,
    output_key='answer'
)

In [111]:
#retriever = db.as_retriever(search_kwargs={"k": 3,"score_threshold":.10}, search_type="similarity_score_threshold")
retriever = db.as_retriever(search_kwargs={"k": 3}, search_type="similarity")


In [112]:
qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    return_source_documents=True,
    return_generated_question = True,
    verbose=False,
    chain_type="stuff",
    combine_docs_chain_kwargs={'prompt': prompt},
)

In [113]:
def ask(question):

    phrases_to_check = ["i cannot determine the answer to that", "i do not know the answer to that", "i can help you with a variety of tasks", "i am uniq-bot 1.0"]

    result = qa({"question": question})

    # output=result["answer"].strip()
    # # clean_output=cleanhtml(output)
    print(result["answer"])

    # if any(phrase in result["answer"].lower() for phrase in phrases_to_check):
    #     print('\nPlease ask a relevant question to UniQ-Bot.')
    # else:
    #     print("\nView this link for more information: ",result["source_documents"][0].metadata['question_url'])



In [124]:
ask("What are the admission requirements for BA History?")

I cannot determine the answer to that.


In [115]:
ask("Is there any scholarships available for the masters program?")

Yes, there are scholarships available for the master's program. For details on scholarships and awards opportunities available to graduate students, please visit the Graduate Studies Scholarships and Awards page.


In [116]:
ask("What are the admission requirements for Masters in Nursing program?")

The admission requirements for the Masters in Nursing (MN) program at Toronto Metropolitan University vary depending on the specific stream. Here are the different streams and their respective requirements:

1. MN Course Stream Program Requirements
2. MN Thesis Stream Program Requirements
3. Combined MN/PHCNP Certificate Program Requirements
4. PHCNP Certificate Program Requirements

Please note that admission to all programs is competitive, and meeting the minimum requirements does not guarantee acceptance.


In [125]:
ask("is there any placements facility for nursing program?")

Yes, there are placement facilities for the nursing program. Here are the key points:

1. The Post Diploma Degree Nursing Program includes placements to ensure students meet entry-to-practice competencies prior to graduation.
2. Students may have the opportunity to complete placements in specialty areas such as critical care, intensive care, emergency, labour and delivery, or pediatrics. However, these placements are limited and follow a competitive process set by the placement agency.
3. The placement site will assign a preceptor to the student, and students cannot find their own preceptor.


In [122]:
ask("How do I get graduation ceremony tickets?")

To obtain graduation ceremony tickets, you must request them when you register for your convocation ceremony. Here are the steps:

1. Request tickets during your convocation ceremony registration.
2. Each graduate will receive 3 or 4 guest tickets, each with a unique barcode.
3. Tickets can be sent directly to guests via email or downloaded and distributed manually.
4. Ensure each guest has a unique ticket (either on a mobile device or a printed copy) for entry into the Convocation Arena.

Note: Extra tickets for seating are not available for any convocation ceremony.


In [118]:
ask("Do children need tickets to attend the ceremony?")

1. Anyone older than five years will need their own ticket.
2. Younger children can attend without a ticket but must sit in a guest's lap.


In [117]:
ask("What time graduation ceremony takes place?")

The graduation ceremonies take place at two different times:

1. 9:30 a.m. (arrive by 8:00 a.m.)
2. 3:30 p.m. (arrive by 2:00 p.m.)


In [119]:
ask("can we wear hats?")

Only PhD graduates wear mortarboards/hats as part of their regalia.
