In [None]:
1. Read the documentation
2. Split the documentation
    * May not generate a response if the token limit is exceeded
    * If the document is lengthy (or input is long), response generation may take a long time
3. Embed and store in a vector database
4. When a question arises, perform similarity search in the vector database
5. Pass the retrieved document, obtained via similarity search, along with the question to the LLM

In [1]:
%pip install -qU langchain-community pypdf


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [5]:
%pip install -qU langchain-text-splitters


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [6]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=200,
)

file_path = "./design_chapter1.pdf"
loader = PyPDFLoader(file_path)
document_list = loader.load_and_split(text_splitter=text_splitter)

In [8]:
len(document_list)

41

In [9]:
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings

load_dotenv()

embedding = OpenAIEmbeddings(model='text-embedding-3-large')

In [10]:
%pip install langchain-chroma

Collecting langchain-chroma
  Downloading langchain_chroma-0.2.5-py3-none-any.whl.metadata (1.1 kB)
Collecting chromadb>=1.0.9 (from langchain-chroma)
  Downloading chromadb-1.0.15-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.0 kB)
Collecting build>=1.0.3 (from chromadb>=1.0.9->langchain-chroma)
  Downloading build-1.3.0-py3-none-any.whl.metadata (5.6 kB)
Collecting pybase64>=1.4.1 (from chromadb>=1.0.9->langchain-chroma)
  Downloading pybase64-1.4.2-cp313-cp313-macosx_11_0_arm64.whl.metadata (8.7 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb>=1.0.9->langchain-chroma)
  Downloading uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb>=1.0.9->langchain-chroma)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb>=1.0.9->langchain-chroma)
  Downloading onnxruntime-1.22.1-cp313-cp313-macosx_13_0_universal2.whl.metadata (4.6 kB)
Collecting opentelemetry-api>=1.2.0 

In [21]:
from langchain_chroma import Chroma

database = Chroma.from_documents(documents=document_list, embedding=embedding, collection_name='chroma-design', persist_directory="./chroma")

In [None]:
query='What is CDN?'
retrieved_docs = database.similarity_search(query, k=3)

In [15]:
retrieved_docs

[Document(id='6d4aa2a9-08d9-4175-bd3c-210e0e2821b6', metadata={'producer': 'PyPDF2', 'creationdate': '', 'page': 15, 'creator': 'PyPDF', 'page_label': '16', 'source': './design_chapter1.pdf', 'total_pages': 32}, page_content='Content delivery network (CDN) \nA CDN is a network of geographically dispersed servers used to deliver static content. CDN\nservers cache static content like images, videos, CSS, JavaScript files, etc.\nDynamic content caching is a relatively new concept and beyond the scope of this book. It\nenables the caching of HTML pages that are based on request path, query strings, cookies,\nand request headers. Refer to the article mentioned in reference material [9] for more about\nthis. This book focuses on how to use CDN to cache static content.\nHere is how CDN works at the high-level: when a user visits a website, a CDN server closest\nto the user will deliver static content. Intuitively, the further users are from CDN servers, the\nslower the website loads. For exam

In [16]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model='gpt-4o')

In [None]:
prompt = f"""[Identity]
- You are the best System Design Architect
- Please answer to the question based on the [Context]

[Context]
{retrieved_docs}

Question: {query}
"""

In [19]:
ai_message = llm.invoke(prompt)

In [20]:
ai_message.content

'CDN(콘텐츠 전송 네트워크)은 지리적으로 분산된 서버들의 네트워크로, 주로 정적 콘텐츠를 전달하는 데 사용됩니다. CDN 서버는 이미지, 비디오, CSS, JavaScript 파일 등과 같은 정적 콘텐츠를 캐시합니다. 사용자가 웹사이트에 접속할 때, 사용자의 가장 가까운 CDN 서버가 해당 정적 콘텐츠를 제공합니다. 사용자가 CDN 서버와 멀리 떨어져 있을수록 웹사이트 로드 속도가 느려지는 경향이 있습니다. CDN의 주요 기능은 웹페이지의 로드 시간을 개선하는 것이며, 이는 사용자 경험을 향상시키고 서버 부하를 줄이는 데 도움을 줍니다.'

In [None]:
Retrieval QA Chain


In [23]:
%pip install -U langchain langchainhub

Collecting langchainhub
  Downloading langchainhub-0.1.21-py3-none-any.whl.metadata (659 bytes)
Collecting packaging>=23.2 (from langchain-core<1.0.0,>=0.3.72->langchain)
  Downloading packaging-24.2-py3-none-any.whl.metadata (3.2 kB)
Collecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub)
  Downloading types_requests-2.32.4.20250611-py3-none-any.whl.metadata (2.1 kB)
Downloading langchainhub-0.1.21-py3-none-any.whl (5.2 kB)
Downloading packaging-24.2-py3-none-any.whl (65 kB)
Downloading types_requests-2.32.4.20250611-py3-none-any.whl (20 kB)
Installing collected packages: types-requests, packaging, langchainhub
[2K  Attempting uninstall: packaging
[2K    Found existing installation: packaging 25.0
[2K    Uninstalling packaging-25.0:
[2K      Successfully uninstalled packaging-25.0
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [langchainhub]
[1A[2KSuccessfully installed langchainhub-0.1.21 packaging-24.2 types-requests-2.32.4.20250611

[1m[[0m[

In [24]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")



In [25]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [26]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=database.as_retriever(),
    chain_type_kwargs={"prompt": prompt}
)

In [28]:
ai_message = qa_chain({"query": query})

  ai_message = qa_chain({"query": query})


In [29]:
ai_message

{'query': 'CDN이 뭔가요?',
 'result': 'CDN은 지리적으로 분산된 서버들의 네트워크로, 정적 콘텐츠(예: 이미지, 비디오, CSS, JavaScript 파일 등)를 제공하는 데 사용됩니다. 사용자와 가장 가까운 CDN 서버가 콘텐츠를 제공하여 웹사이트의 로드 시간을 개선합니다. CDN은 비용이나 캐시 만료 시간 설정 등 여러 고려사항도 필요합니다.'}