# 02. Chroma 기반 RAG - 주석 포함 버전

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
CHROMA_PATH = os.getenv('CHROMA_PATH','./chroma_store')


In [2]:
!pip install -U llama-index llama-index-vector-stores-chroma

Collecting llama-index
  Using cached llama_index-0.14.0-py3-none-any.whl.metadata (12 kB)
Collecting llama-index-cli<0.6,>=0.5.0 (from llama-index)
  Using cached llama_index_cli-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Collecting llama-index-core<0.15,>=0.13.6 (from llama-index)
  Using cached llama_index_core-0.14.1-py3-none-any.whl.metadata (2.5 kB)
Collecting llama-index-embeddings-openai<0.6,>=0.5.0 (from llama-index)
  Using cached llama_index_embeddings_openai-0.5.1-py3-none-any.whl.metadata (400 bytes)
Collecting llama-index-llms-openai<0.6,>=0.5.0 (from llama-index)
  Using cached llama_index_llms_openai-0.5.6-py3-none-any.whl.metadata (3.0 kB)
Collecting llama-index-readers-file<0.6,>=0.5.0 (from llama-index)
  Using cached llama_index_readers_file-0.5.4-py3-none-any.whl.metadata (5.7 kB)
Collecting llama-index-workflows<3,>=2 (from llama-index-core<0.15,>=0.13.6->llama-index)
  Using cached llama_index_workflows-2.1.0-py3-none-any.whl.metadata (6.4 kB)
INFO: pip is looking 

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llama-index-agent-openai 0.4.12 requires llama-index-core<0.13,>=0.12.41, but you have llama-index-core 0.14.1 which is incompatible.
llama-index-agent-openai 0.4.12 requires llama-index-llms-openai<0.5,>=0.4.0, but you have llama-index-llms-openai 0.5.6 which is incompatible.
llama-index-multi-modal-llms-openai 0.5.3 requires llama-index-core<0.13,>=0.12.47, but you have llama-index-core 0.14.1 which is incompatible.
llama-index-multi-modal-llms-openai 0.5.3 requires llama-index-llms-openai<0.5,>=0.4.0, but you have llama-index-llms-openai 0.5.6 which is incompatible.
llama-index-program-openai 0.3.2 requires llama-index-core<0.13,>=0.12.0, but you have llama-index-core 0.14.1 which is incompatible.
llama-index-program-openai 0.3.2 requires llama-index-llms-openai<0.5,>=0.4.0, but you have llama-index-llms-openai

In [3]:
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader
import chromadb

embed_model = OpenAIEmbedding(model="text-embedding-3-small")
client = chromadb.PersistentClient(path=CHROMA_PATH)
# ChromaDB의 영구 저장 클라이언트를 생성
collection = client.get_or_create_collection("workshop")
# "workshop"이라는 이름의 컬렉션을 가져오거나 없으면 새로 생성합니다
# collection은 관련된 벡터들을 그룹화하는 단위
vector_store = ChromaVectorStore(chroma_collection=collection)
# ChromaDB 컬렉션을 LlamaIndex가 사용할 수 있는 벡터 스토어로 래핑

storage_context = StorageContext.from_defaults(vector_store=vector_store)
# 위에서 만든 벡터 스토어를 사용하는 저장소 컨텍스트를 생성

docs = SimpleDirectoryReader('./data/txt').load_data()
index = VectorStoreIndex.from_documents(docs, storage_context=storage_context, embed_model=embed_model)
# 문서들을 벡터 인덱스로 변환합니다
# 각 문서가 청크로 나뉘고, 각 청크가 벡터로 변환되어 ChromaDB에 저장됩니다
# embed_model을 사용해서 텍스트를 벡터로 변환

qe = index.as_query_engine(similarity_top_k=5)
# similarity_top_k=5: 유사도가 높은 상위 5개의 청크를 검색해서 사용
print(qe.query('핵심 용어 3개를 뽑아 설명해줘'))


2025-09-13 01:06:09,333 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-09-13 01:06:10,157 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-09-13 01:06:12,551 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Document/Node: 데이터의 기본 단위로, LlamaIndex에서 사용되는 정보의 최소 단위입니다.  
Index: 검색을 위한 데이터 구조로, LlamaIndex에서 문서를 효율적으로 저장하고 검색하기 위해 사용됩니다.  
Retriever: 관련 정보를 찾는 검색 엔진으로, LlamaIndex에서 사용자의 질문에 대한 정보를 검색하는 역할을 합니다.


In [4]:
# eos