## Load Embedding Model and Embed the text

In [5]:
from FlagEmbedding import BGEM3FlagModel

model = BGEM3FlagModel('BAAI/bge-m3',  
                       use_fp16=True)

sentence=["What is BGE M3?", "Defination of BM25"]

embedding = model.encode(sentence)['dense_vecs']


  from .autonotebook import tqdm as notebook_tqdm
Fetching 30 files:   0%|          | 0/30 [00:00<?, ?it/s]

## Load Chromadb client and create VectorDb collection

In [18]:
import chromadb


client = chromadb.PersistentClient(path="./VectorDb")

vector_db_name = "vector_db_name"

try:
    client.delete_collection(name=vector_db_name)
    print(f"deleting vector db if exists")
except:
    print(f"{vector_db_name} doesn't exists")

collection = client.create_collection(
      name=vector_db_name,
      metadata={"hnsw:space": "cosine"},
  )

deleting vector db if exists


## Add your data to db

If you don't want to embed, you can leave as text and it will use default embedding model from ChromaDb

In [19]:
## Add element to the Vector Db
for i in range(len(sentence)):
    text_topic_name=str(i)
    collection.add(
            # embeddings = embedding[i],
            documents=sentence[i],
            metadatas=[{"topic": text_topic_name}],
            ids=[f"{text_topic_name}__id"],
        )

## Query your database

Passing just the text will use default embedding model from Chromadb

In [21]:
results = collection.query(
    query_embeddings=[[1,2,34,3]],
    n_results=2,
)
print(results)

In [22]:
results = collection.query(
    query_texts=["what is this?"],
    n_results=2,
)
print(results)

Number of requested results 2 is greater than number of elements in index 1, updating n_results = 1


{'ids': [['0__id']], 'distances': [[0.8066927626105782]], 'metadatas': [[{'topic': '0'}]], 'embeddings': None, 'documents': [['hi']], 'uris': None, 'data': None}


## Host your chroma Db as server

In terminal run this:
`chroma run --path ./VectorDb --port 5000`

This will start your vector db server at local host: 5000

## Query from hosted chroma client

In [15]:
import chromadb
chroma_client = chromadb.HttpClient(host='localhost', port=8000)
collection_http = client.get_collection(name=vector_db_name) 

In [16]:
results = collection_http.query(
    query_embeddings=[[1,2,34,3]],
    n_results=2,
)
print(results)

Number of requested results 2 is greater than number of elements in index 1, updating n_results = 1


{'ids': [['0__id']], 'distances': [[0.0004216299616589536]], 'metadatas': [[{'topic': '0'}]], 'embeddings': None, 'documents': [['hi']], 'uris': None, 'data': None}
