In [None]:
#!pip install chromadb

In [20]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

In [21]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext

In [27]:
# Creates a persistent instance of Chroma that saves to disk. 
# This is useful for testing and development, but not recommended for production use.
db = chromadb.PersistentClient(path = r"D:\AI-DATASETS\02-MISC-large\GenAI-LLMs\chromadb\llamaindex-chroma_database-bks")

In [32]:
chroma_collection = db.get_or_create_collection(
    "my_chroma_store-bks"
)

In [33]:
vector_store = ChromaVectorStore(
    chroma_collection=chroma_collection
)

In [34]:
storage_context = StorageContext.from_defaults(
    vector_store=vector_store
)

In [35]:
documents = SimpleDirectoryReader("files").load_data()

index = VectorStoreIndex.from_documents(
    documents      = documents, 
    storage_context= storage_context
)

In [37]:
#the following part displays the entire contents of the ChromaDB collection
results = chroma_collection.get()
results

{'ids': ['d7831a9c-f341-4894-824a-7a361486ece0',
  '894195b1-8638-4604-974d-6a3fc8c7efb9',
  '7637b4d0-672b-4940-9014-2f694db7e9de',
  '0298de2d-15c9-4c5f-b6ed-cb5428c0339d'],
 'embeddings': None,
 'metadatas': [{'_node_content': '{"id_": "d7831a9c-f341-4894-824a-7a361486ece0", "embedding": null, "metadata": {"file_path": "D:\\\\gridflowAI\\\\00-ALL COURSES REPO\\\\Innovative-AI\\\\01-GenAI\\\\06-LLMs\\\\code\\\\05-various LLMs\\\\10-LlamaIndex\\\\01-step wise examples\\\\files\\\\sample_document1.txt", "file_name": "sample_document1.txt", "file_type": "text/plain", "file_size": 612, "creation_date": "2024-09-14", "last_modified_date": "2024-09-07"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "abbafe9e-4c35-4a5a-8947-15363f2c5272", 

In [17]:
index = VectorStoreIndex.from_vector_store(
    vector_store   = vector_store, 
    storage_context= storage_context
)

#### Difference between Vector Stores and Vector Databases

**• Vector Store:**
   - Refers to a storage system where vectors are stored.
   - Vectors are high-dimensional representations of data (text, images, audio) used by machine learning models.
   - Focuses on efficient storage, not necessarily on querying or analyzing the vectors.
   - Mainly used to maintain a large repository of vectors that can be retrieved for machine learning tasks.

**• Vector Database:**
   - A more sophisticated system that stores vectors and provides advanced querying and analysis capabilities.
   - Includes the ability to perform similarity searches and other complex operations.
   - Designed to handle the challenges of high-dimensional vector data, offering specialized indexing for efficient search and retrieval.
   - Encompasses both storage and querying, making it ideal for applications requiring fast and accurate searches through large volumes of vectorized data.

**Summary:**
   - A **vector store** focuses on storage, while a **vector database** combines storage with advanced querying functionalities.


https://learn.microsoft.com/en-us/semantic-kernel/memories/vector-db.