# ChromaDB Getting Started

https://docs.trychroma.com/getting-started

## Install

In [None]:
# !pip install chromadb

## Create a Chroma Client

In [1]:
import chromadb
chroma_client = chromadb.Client()


## Create a collection

Collections are where you'll store your embeddings, documents, and any additional metadata. You can create a collection with a name:

In [2]:
#collection = chroma_client.create_collection(name="my_collection")
collection = chroma_client.create_collection(name="words")

## Add some text documents to the collection

Chroma will store your text and handle embedding and indexing automatically. You can also customize the embedding model.

In [3]:
collection.add(
    documents = ["apple","peach","phone","music"],
    ids = ["id1", "id2", "id3", "id4"]
)

/home/bigdata/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [02:21<00:00, 589kiB/s] 


## Query the collection

You can query the collection with a list of query texts, and Chroma will return the n most similar results. It's that easy!

In [4]:
results = collection.query(
    query_texts=["fruit"],
    n_results=4
)

results

{'ids': [['id2', 'id1', 'id3', 'id4']],
 'embeddings': None,
 'documents': [['peach', 'apple', 'phone', 'music']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[None, None, None, None]],
 'distances': [[0.6450808644294739,
   0.9255111217498779,
   1.1451698541641235,
   1.280678153038025]]}

In [5]:
results = collection.query(
    query_texts=["fruit"],
    n_results=1
)

results

{'ids': [['id2']],
 'embeddings': None,
 'documents': [['peach']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[None]],
 'distances': [[0.6450808644294739]]}

In [6]:
results = collection.query(
    query_texts=["art"],
    n_results=1
)

results

{'ids': [['id4']],
 'embeddings': None,
 'documents': [['music']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[None]],
 'distances': [[0.9536923766136169]]}

In [7]:
results = collection.query(
    query_texts=["i'm feeling hungry"],
    n_results=1
)

results

{'ids': [['id2']],
 'embeddings': None,
 'documents': [['peach']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[None]],
 'distances': [[1.534833550453186]]}

In [8]:
results = collection.query(
    query_texts=["i would like to talk with someone"],
    n_results=1
)

results["documents"]

[['phone']]

In [None]:
## View the embeddings for a document

In [9]:
document_id = "id1"
result = collection.get(ids=[document_id], include=["embeddings"])
embeddings = result["embeddings"][0]  # embeddings of the specified document


In [10]:
embeddings

array([-6.13850448e-03,  3.10117602e-02,  6.47936091e-02,  1.09414663e-02,
        5.26720192e-03, -4.74764630e-02,  8.12030360e-02,  2.89809778e-02,
        6.67619333e-02,  3.03004198e-02,  5.74649572e-02, -8.62358045e-03,
        1.32275408e-03,  3.99159238e-04, -1.88430380e-02, -2.57937927e-02,
       -1.30420616e-02, -5.26249781e-02, -5.82925230e-02, -2.58992203e-02,
       -3.33736651e-02,  2.45679412e-02, -5.22656227e-03,  2.30059307e-02,
        3.28607410e-02,  7.50218108e-02,  5.80181414e-03, -1.49586303e-02,
       -2.87529118e-02, -1.18551835e-01, -3.93217169e-02, -5.13875559e-02,
        7.66181946e-02,  4.84036915e-02, -3.02564390e-02, -9.14341584e-02,
        5.11821583e-02, -9.64969490e-03, -2.15108246e-02, -7.17740878e-02,
       -6.32242411e-02, -1.76688489e-02,  2.80814338e-02,  9.00471210e-02,
        1.94179155e-02,  5.45441918e-03,  4.83017750e-02,  8.49087723e-03,
        2.77505443e-02,  9.65605676e-02,  2.63026636e-02, -3.05333398e-02,
       -6.88157380e-02, -