# 📝 RAGPipeline: ChatNVIDIA + NVIDIAEmbeddings + ChromaDB
This notebook defines a reusable Python class for building RAG applications.

In [None]:
# Install required libraries (if not already installed)
!pip install -q langchain-community langchain-chroma langchain-nvidia-ai-endpoints

In [None]:
import os
from google.colab import userdata
os.environ['NVIDIA_API_KEY'] = userdata.get('NVIDIA_API_KEY')

In [None]:
# SKIP THIS CELL, in case you access local NIM on your own server
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
apikey = os.getenv('NVIDIA_API_KEY', "no-pass")

## 📂 Setup Imports and Define RAGPipeline Class

In [None]:
import hashlib
import os

from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_community.vectorstores import Chroma
from langchain_chroma import Chroma
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain.chains import RetrievalQA

# --- Configuration ---
LLM_ENDPOINT = "https://integrate.api.nvidia.com/v1"
LLM_MODEL = "meta/llama-3.2-3b-instruct"

EMBEDDING_ENDPOINT = "https://integrate.api.nvidia.com/v1"
EMBEDDING_MODEL = "nvidia/llama-3.2-nv-embedqa-1b-v2"

class RAGPipeline:
    def __init__(self, document_path, chroma_dir, hash_file, embedding_model=EMBEDDING_MODEL, chat_model=LLM_MODEL, chunk_size=500, chunk_overlap=50):
        self.document_path = document_path
        self.chroma_dir = chroma_dir
        self.hash_file = hash_file
        self.embedding_model = embedding_model
        self.chat_model = chat_model
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.vector_db = None
        self.qa_chain = None

    def compute_md5(self):
        hash_md5 = hashlib.md5()
        with open(self.document_path, "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""):
                hash_md5.update(chunk)
        return hash_md5.hexdigest()

    def document_changed(self):
        current_hash = self.compute_md5()
        if os.path.exists(self.hash_file):
            with open(self.hash_file, "r") as f:
                saved_hash = f.read().strip()
            if current_hash == saved_hash:
                print("✅ Document has not changed. Using existing vector store.")
                return False
        with open(self.hash_file, "w") as f:
            f.write(current_hash)
        print("🔄 Document changed or first run. Rebuilding vector store...")
        return True

    def build_vector_store(self):
        if self.document_changed():
            loader = TextLoader(self.document_path)
            documents = loader.load()
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=self.chunk_size,
                chunk_overlap=self.chunk_overlap
            )
            docs = text_splitter.split_documents(documents)
            embedding_function = NVIDIAEmbeddings(base_url=EMBEDDING_ENDPOINT, model=self.embedding_model)
            self.vector_db = Chroma.from_documents(
                documents=docs,
                embedding=embedding_function,
                persist_directory=self.chroma_dir
            )
        else:
            embedding_function = NVIDIAEmbeddings(base_url=EMBEDDING_ENDPOINT, model=self.embedding_model)
            self.vector_db = Chroma(
                persist_directory=self.chroma_dir,
                embedding_function=embedding_function
            )

    def setup_qa_chain(self):
        if self.vector_db is None:
            self.build_vector_store()
        retriever = self.vector_db.as_retriever(search_kwargs={"k": 3})

        llm = ChatNVIDIA(base_url=LLM_ENDPOINT, model=self.chat_model, temperature=0)
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            retriever=retriever,
            return_source_documents=True
        )

    def ask(self, query):
        if self.qa_chain is None:
            self.setup_qa_chain()
        result = self.qa_chain(query)
        print("\n✅ Answer:", result["result"])
        print("\n📄 Sources:")
        for doc in result["source_documents"]:
            print("-", doc.metadata.get("source", "Unknown"))
        return result

## 🚀 Initialize and Use RAGPipeline

In [None]:
# download sample data
!git clone https://github.com/manote101/Building-Apps-with-NIM.git

In [None]:
# Initialize pipeline
pipeline = RAGPipeline(
    document_path="Building-Apps-with-NIM/data/doc1.txt",
    chroma_dir="./chroma_db",
    hash_file="Building-Apps-with-NIM/data/doc_hash.txt"
)

# Ask a question
pipeline.ask("Are there any service providers/ISVs who already implemented Nemo Microservices?")

In [None]:
pipeline.ask("มีใครใช้ Nemo microservices บ้าง")