In [1]:
!pip install langchain-community pypdf
!pip install -qU langchain-aws
!pip install -qU langchain-chroma
!pip install langchain

Collecting langchain-community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting pypdf
  Downloading pypdf-6.1.3-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<2.0.0,>=1.0.1 (from langchain-community)
  Downloading langchain_core-1.0.1-py3-none-any.whl.metadata (3.5 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain-community)
  Downloading langchain_classic-1.0.0-py3-none-any.whl.metadata (3.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.10.1 (from langchain-community)
  Downloading pydantic_settings-2.11.0-py3-none-any.whl.metadata (3.4 kB)
Collecting langsmith<1.0.0,>=0.1.125 (from langchain-community)
  Downloading langsmith-0.4.38-py3-none-any.whl.metadata (14 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.3-py3-none-any.whl.metadata (9.7

## Add the required import

In [2]:
import os
from typing import List

from langchain.agents import create_agent
from langchain_aws import BedrockEmbeddings
from langchain.chat_models import init_chat_model
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_core.runnables import chain
from langchain.tools import tool
from langchain_chroma import Chroma


## Method to read the pdf content

In [3]:
def load_pdf(file_path):
    loader = PyPDFLoader(file_path)
    docs = loader.load()
    return docs

In [4]:
pdf_content = load_pdf(file_path='SystemDesignInterview_1.pdf')

Ignoring wrong pointing object 8 0 (offset 0)


In [5]:
print(len(pdf_content))

1


In [6]:
def load_pdf_by_splitter(file_path):
    text_splitter = RecursiveCharacterTextSplitter(
       chunk_size=1000, chunk_overlap=200, add_start_index=True
    )
    doc = load_pdf(file_path)
    all_splits = text_splitter.split_documents(doc)
    return all_splits

In [7]:
pdf_content_by_splitter = load_pdf_by_splitter(file_path='SystemDesignInterview_1.pdf')

Ignoring wrong pointing object 8 0 (offset 0)


In [8]:
pdf_content_by_splitter[0].page_content

'Vertical scaling vs horizontal scaling Vertical scaling, referred to as “scale up” , means the process of adding more power (CPU, RAM, etc.) to your servers. Horizontal scaling, referred to as “scale-out” , allows you to scale by adding more servers into your pool of resources. When traffic is low, vertical scaling is a great option, and the simplicity of vertical scaling is its main advantage. Unfortunately, it comes with serious limitations. • Vertical scaling has a hard limit. It is impossible to add unlimited CPU and memory to a single server. • Vertical scaling does not have failover and redundancy. If one server goes down, the website/app goes down with it completely. Horizontal scaling is more desirable for large scale applications due to the limitations of vertical scaling. In the previous design, users are connected to the web server directly. Users will unable to access the website if the web server is offline. In another scenario, if many users access the web server'

## Generating embeddings

In [9]:
embedding_model = BedrockEmbeddings(model_id="amazon.titan-embed-text-v2:0")

In [10]:
def generate_embedding():
    embeddings = []
    for content in pdf_content_by_splitter:
        current_embedding = embedding_model.embed_query(content.page_content)
        embeddings.append(current_embedding)
    return embeddings

In [11]:
# I am using AI snadbox to test my learning and due to contraints i should not create more token
#len(generate_embedding())

## Store embedding into Vector DB

In [19]:
def store_embedding():
    persist_dir = "./chroma_system_design_db_4"
    collection_name = "system_design_collection"

    # If persisted data exists, instantiate and reuse without adding documents.
    if os.path.exists(persist_dir) and any(os.scandir(persist_dir)):
        vector_store = Chroma(
            collection_name=collection_name,
            embedding_function=embedding_model,
            persist_directory=persist_dir,
        )
        print(f"Reusing existing vector store at {persist_dir}")
        return vector_store

    # Otherwise create, add documents and persist.
    vector_store = Chroma(
        collection_name=collection_name,
        embedding_function=embedding_model,
        persist_directory=persist_dir,
    )

    vector_store.add_documents(pdf_content_by_splitter)
    try:
        vector_store.persist()
    except Exception:
        pass

    print(f"Created and persisted new vector store at {persist_dir}")
    return vector_store

In [20]:
vector_store = store_embedding()

Created and persisted new vector store at ./chroma_system_design_db_4


## Getting the related documents for the provided query

In [21]:
@chain
def search_query_by_similarty_search(query: str, k: int = 1) -> List[Document]:
    return vector_store.similarity_search(query, k=k)

In [22]:
retriever_results = search_query_by_similarty_search.batch(['what is horizontal scaling'])
for result in retriever_results:
    print(result[0].page_content)


Vertical scaling vs horizontal scaling Vertical scaling, referred to as “scale up” , means the process of adding more power (CPU, RAM, etc.) to your servers. Horizontal scaling, referred to as “scale-out” , allows you to scale by adding more servers into your pool of resources. When traffic is low, vertical scaling is a great option, and the simplicity of vertical scaling is its main advantage. Unfortunately, it comes with serious limitations. • Vertical scaling has a hard limit. It is impossible to add unlimited CPU and memory to a single server. • Vertical scaling does not have failover and redundancy. If one server goes down, the website/app goes down with it completely. Horizontal scaling is more desirable for large scale applications due to the limitations of vertical scaling. In the previous design, users are connected to the web server directly. Users will unable to access the website if the web server is offline. In another scenario, if many users access the web server


## RAG agents

In [26]:
chat_model = init_chat_model(
    "openai.gpt-oss-120b-1:0",
    model_provider="bedrock_converse",
)

In [51]:
@tool
def search_similar_documents(query: str, k: int) -> List[str]:
    """Return page_content strings for the top-k similar documents.

    Args:
        query: Query string to search.
        k: Number of documents to retrieve.

    Returns:
        List[str]: page_content of retrieved documents.
    """
    retrieved_documents = []
    results = search_query_by_similarty_search.batch([(query, k)])
    for search_similar_document in results:
        retrieved_documents.extend(search_similar_document.page_content)
    return retrieved_documents

In [None]:
def build_rag_pipeline(query: str):
    tools = [search_similar_documents]
    prompt = "Only answer using the supplied conversation and documents. Do not use or invent information from your pretraining. If the content does not contain the answer, respond I don't know."
    agent = create_agent(chat_model, tools, system_prompt=prompt)
    for event in agent.stream(
        {"messages": [{"role": "user", "content": query}]},
        stream_mode="values",
    ):
        event["messages"][-1].pretty_print()


In [56]:
build_rag_pipeline("What is Nitya")

********************What is Nitya

What is Nitya

[{'type': 'reasoning_content', 'reasoning_content': {'text': 'The user asks: "What is Nitya". Likely they want definition. Could be a name, Indian term meaning "eternal", also a brand or company. Need to answer succinctly. Provide meaning. Let\'s answer: Nitya is a Sanskrit word meaning "eternal", "permanent", also used as a name, also a company named Nitya. Provide context.', 'signature': ''}}, {'type': 'text', 'text': '**Nitya** (नीत्य) is a Sanskrit word that means “eternal,” “permanent,” or “ever‑lasting.” It is commonly used in Indian philosophical and spiritual texts to describe the timeless nature of reality, the soul, or divine principles.\n\nBecause of its positive connotation, **Nitya** is also used as a personal name for both boys and girls in India and among the Indian diaspora.  \n\nIn addition, “Nitya” appears as a brand name for various companies and products (e.g., Nitya\u202fTech, Nitya\u202fFinance, Nitya\u202fWellness