In [1]:
documents_en = [
    "Nature is an integral part of our lives.",
    "Technology is evolving at a rapid pace.",
    "Eating healthy food is important for the body and mind.",
    "The universe is vast and full of mysteries.",
    "Programming languages are tools for building software."
] 

query_en = "What should we know about the cosmos?"

documents_de = [
    "Die Natur ist ein integraler Bestandteil unseres Lebens.",
    "Die Technologie entwickelt sich in rasantem Tempo.",
    "Gesunde Ernährung ist wichtig für Körper und Geist.",
    "Das Universum ist weit und voller Geheimnisse.",
    "Programmiersprachen sind Werkzeuge zum Erstellen von Software."
]

query_de = "Was sollten wir über den Kosmos wissen?"

In [None]:
import chromadb
from chromadb.utils import embedding_functions

# setup Chroma in-memory, for easy prototyping. Can add persistence easily!
client = chromadb.Client()

# Create collection. get_collection, get_or_create_collection, delete_collection also available!
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="ibm-granite/granite-embedding-278m-multilingual")
collection = client.get_or_create_collection("documents",embedding_function=sentence_transformer_ef)

try:
    client.delete_collection("documents")
except:
    pass

# Add docs to the collection. Can also update and delete. Row-based API coming soon!
collection.add(
    documents=documents_en, # we handle tokenization, embedding, and indexing automatically. You can skip that and add your own embeddings as well
    metadatas=[{"source": "array"}]*len(documents_en), # filter on these!
    ids=[str(i) for i in range(len(documents_en))], # unique for each doc
)


In [6]:
# Query/search 2 most similar results. You can also .get by id
results = collection.query(
    query_texts=["Was sollten wir über den Kosmos wissen?"],
    n_results=2,
    # where={"metadata_field": "is_equal_to_this"}, # optional filter
    # where_document={"$contains":"search_string"}  # optional filter
)

results

{'ids': [['3', '0']],
 'embeddings': None,
 'documents': [['The universe is vast and full of mysteries.',
   'Nature is an integral part of our lives.']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[{'source': 'array'}, {'source': 'array'}]],
 'distances': [[0.5222079157829285, 0.7057667970657349]]}