# Simple RAG demo with all the modules
- Langchain document loader
- custom text splitter
- FAISS vector db
- Hugginface embedding
- LLM model
- RAG chain using LCEL

In [None]:
from __future__ import annotations

import os

# load env variables
from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv())

In [None]:
# set up the OpenAI API key
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [None]:
# setup pathlib
from pathlib import Path

try:  # inside a script
    BASE_DIR = Path(__file__).resolve().parent.parent
except NameError:  # inside a notebook
    BASE_DIR = Path.cwd().parent

In [None]:
sample_input_file = BASE_DIR / "data" / "sample_input.txt"

## Data Ingestion Module

In [None]:
# using langchai load the document for further processing
from langchain.document_loaders import TextLoader

loader = TextLoader(sample_input_file, encoding="utf-8")
documents = loader.load()

In [None]:
# now let us split the document into manageable chunks using a custom text splitter
# based on research on link - https://research.trychroma.com/evaluating-chunking
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200, chunk_overlap=0, separators=["\n\n", "\n", ".", "?", "!", " ", ""]
)

In [None]:
# convert the document into manageable chunks using text splitter
text_chunks = text_splitter.split_documents(documents=documents)
text_chunks[0]  # verify the structure of chunks

In [None]:
# of chunks generated from the document
print(len(text_chunks))

In [None]:
# importing the necessary library and initializing an embedding model from huggingface

from langchain_community.vectorstores import FAISS
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

model_name = "google/embeddinggemma-300m"

In [None]:
# embedding function
hf_embeddings = HuggingFaceEmbeddings(
    model_name=model_name, model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": True}
)

### in case of custom embedding function

In [None]:
vector_db = FAISS.from_documents(documents=text_chunks, embedding=hf_embeddings)

In [None]:
# saving vector db into local for reuse
vector_db_local = BASE_DIR / "data"

In [None]:
vector_db.save_local(folder_path=vector_db_local, index_name="vector_db")

## Retrival using cosine based similarity search

In [None]:
query = "What are the main components of a comprehensive Agentic AI testing system?"
relevant_chunks = vector_db.similarity_search(query, k=5)
for idx, doc in enumerate(relevant_chunks):
    print(f"Chunk {idx + 1}: {doc.page_content}")
    print("=" * 50)

## Response generation with the help of relevant chunks from vector db

In [None]:
from langchain.prompts import ChatPromptTemplate

template = """
You are an AI assistant that helps users with their questions.
You are given a question and a set of relevant chunks from vector db.
You need to generate a response that is relevant to the question and the chunks.
If you do not know the answer, just say that you do not know.
Use maximum ten sentences and keep the answer concise.
Question: {question}
Relevant chunks: {chunks}
Response:
"""

In [None]:
# initialize the prompt instance
prompt = ChatPromptTemplate.from_template(template=template)

In [None]:
# create output parser to format response
from langchain.schema.output_parser import StrOutputParser

output_parser = StrOutputParser()

In [None]:
# setup llm model
from langchain_openai.chat_models import ChatOpenAI

llm_model = ChatOpenAI(model="gpt-5-nano", api_key=OPENAI_API_KEY)

In [None]:
# setup vector db as retriever
retriever = vector_db.as_retriever()

In [None]:
# creating chain using LCEL
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough

rag_chain = (
    RunnableParallel({"question": RunnablePassthrough(), "chunks": RunnablePassthrough() | retriever})
    | prompt
    | llm_model
    | output_parser
)

In [None]:
rag_chain.invoke(query)