## Chroma 
chroma is an AI-native open-source vector database focused on developer productivity. Chroma is licensed under Apache2.0


In [1]:
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from chromadb.config import Settings 

loader = TextLoader("speech.md")
initial_load = loader.load()

In [2]:
splitter = RecursiveCharacterTextSplitter(chunk_size=100,chunk_overlap=10)
docs = splitter.split_documents(initial_load)

In [3]:
docs

[Document(metadata={'source': 'speech.md'}, page_content='Ladies and gentlemen,'),
 Document(metadata={'source': 'speech.md'}, page_content='Today, we stand at the crossroads of innovation and tradition. Technology has transformed the way'),
 Document(metadata={'source': 'speech.md'}, page_content='the way we live, work, and communicate. It is our responsibility to harness this power for the'),
 Document(metadata={'source': 'speech.md'}, page_content='for the greater good.'),
 Document(metadata={'source': 'speech.md'}, page_content="Together, let's build a future that is inclusive, sustainable, and driven by knowledge."),
 Document(metadata={'source': 'speech.md'}, page_content='Thank you.')]

In [4]:
from dotenv import load_dotenv
import os
load_dotenv()
api_key2 = os.getenv("OPENAI_API_KEY")

embeddings = OpenAIEmbeddings(api_key=api_key2,model="text-embedding-3-large",dimensions=1024)

  embeddings = OpenAIEmbeddings(api_key=api_key2,model="text-embedding-3-large",dimensions=1024)
                    dimensions was transferred to model_kwargs.
                    Please confirm that dimensions is what you intended.


In [13]:
from langchain_community.vectorstores import Chroma

vectordb = Chroma(
    collection_name="my_collection",  # A string is required here
    embedding_function=embeddings,
    persist_directory="./chroma_db"  # This saves to disk
)

In [14]:
vectordb.add_documents(documents=docs)
print("Documents added to Chroma database.")

Documents added to Chroma database.


In [15]:
vectordb.persist()
print("Chroma database saved successfully.")

Chroma database saved successfully.


  vectordb.persist()


In [16]:
query = "what transformed the lives of people?"
results = vectordb.similarity_search(query)

In [17]:
results[0].page_content

'the way we live, work, and communicate. It is our responsibility to harness this power for the'

In [18]:
# ✅ Display results
if results:
    print("Top match:", results[0].page_content)
else:
    print("No relevant results found.")

Top match: the way we live, work, and communicate. It is our responsibility to harness this power for the
