## Hands-on Practice RAG

In [None]:
# step 1: load pdf
from langchain.document_loaders import PyPDFLoader
def load_pdf(pdf_path):
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()
    return documents

# step 2: split pdf into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
def split_chunks(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100,
        length_function=len
    )
    chunks = text_splitter.split_documents(documents)
    return chunks

# step 3: initialize vector store
import os
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
def get_vector_store(pdf_path):
    embeddings = HuggingFaceEmbeddings(
        model_name="BAAI/bge-small-en-v1.5",
        model_kwargs={"device": "cpu"},
        encode_kwargs={"normalize_embeddings": True}
    )
    persist_directory = "./chroma_db"
    if os.path.exists(persist_directory) and os.listdir(persist_directory):
        vector_store = Chroma(
            persist_directory=persist_directory,
            embedding_function=embeddings
        )
    else:
        documents = load_pdf(pdf_path)
        chunks = split_chunks(documents)
        vector_store = Chroma.from_documents(
            persist_directory=persist_directory,
            embedding=embeddings,
            documents=chunks
        )
    return vector_store

# step 4: initialize llm
from langchain_huggingface import HuggingFaceEndpoint
def initialize_llm():
    llm = HuggingFaceEndpoint(
        repo_id="mistralai/Mistral-7B-Instruct-v0.2"
    )
    return llm

# step 5: initialize retreival chain
from langchain.chains import RetrievalQA
def get_rag_chain(vector_store, llm):
    retriever = vector_store.as_retriever(
        search_kwargs={"k": 3}
    )
    rag_chain = RetrievalQA.from_chain_type(
        retriever=retriever,
        llm=llm
    )
    return rag_chain

In [None]:
vector_store = get_vector_store("Trigger_Developer_Guide.pdf")

In [None]:
llm = initialize_llm()

In [None]:
rag_chain = get_rag_chain(vector_store, llm)

In [None]:
rag_chain.invoke({"query": "what is a trigger?"})