# RAG

In [None]:
# To run mistral:
# 1) "ollama serve" on one tab in terminal
# 2) "ollama run mistral" on another tab in terminal

In [None]:
# !pip install -r requirements.txt

In [None]:
import fitz
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import Ollama
from langchain.chains import RetrievalQA

## Chunking and Embedding

In [None]:
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    full_text = ""
    for page in doc:
        text = page.get_text("text")  # Avoids layout artifacts
        full_text += text + "\n"
    return full_text

text = extract_text_from_pdf("student_handbook.pdf")
with open("student_handbook.txt", "w", encoding="utf-8") as f:
    f.write(text)


In [None]:
loader = TextLoader("student_handbook.txt")
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = splitter.split_documents(docs)

embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embedding)
vectorstore.save_local("faiss_student_handbook")


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




## Retrieval

In [11]:
llm = Ollama(model="mistral")
retriever = vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
query = "What are the requirements to complete the MGS degree?"
answer = qa_chain.run(query)
print(answer)


 To complete the Management Science (MGS) degree, a student must meet the following requirements:

1. Complete a total of 130-132 credit hours.
2. Fulfill the University Core requirement which consists of 8 credit hours (3 courses).
3. Complete the University Distribution requirement by taking courses in both In Group and Out-Group, totaling 14 credit hours (4 courses).
4. Finish the MGSHSS School Core Curriculum which requires 9 credit hours (3 courses).
5. Complete Free Electives as needed to reach the minimum of 130 credit hours.
6. Fulfill the Major Requirement, including:
   a. Business Core courses
   b. Major Cores courses
   c. Major Electives courses (which can be completed in combinations of 5 MGS electives or 4 MGS electives + Internship)
7. Declare their major by the end of their sophomore year.
8. Students graduating with a BSc (Hons) degree will also need to complete any additional requirements specific to their chosen major, such as the university core and distribution i