In [3]:
import os
import chromadb
from keys import huggingface_api

In [5]:
from chromadb.utils.embedding_functions import HuggingFaceEmbeddingFunction

In [7]:
embeddings= HuggingFaceEmbeddingFunction(api_key= huggingface_api)

In [9]:
cur_dir= os.getcwd()
data_path= os.path.join(cur_dir, "chromadb")
client= chromadb.PersistentClient(path= data_path)

In [11]:
collection= client.create_collection(name= "temp-collection", embedding_function= embeddings)

In [21]:
collection1= client.create_collection(name= "temp1-collection", embedding_function= embeddings)

In [23]:
collections= client.list_collections()
print(collections)

[<chromadb.api.models.Collection.Collection object at 0x00000286D8795A50>, <chromadb.api.models.Collection.Collection object at 0x00000286D87432D0>]


In [25]:
collection= client.get_collection(name= "temp-collection", embedding_function= embeddings)

In [29]:
collection.add(ids= ["id1","id2"], documents= ["this is document 1", "this is document 2"])

In [31]:
collection.count()

2

In [33]:
collection.get(ids= ["id2"])

{'ids': ['id2'],
 'embeddings': None,
 'metadatas': [None],
 'documents': ['this is document 2'],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents']}

In [35]:
collection.get(ids= ["id2", "id1"])

{'ids': ['id1', 'id2'],
 'embeddings': None,
 'metadatas': [None, None],
 'documents': ['this is document 1', 'this is document 2'],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents']}

In [37]:
id_text= "id"
text= "this is text"

for i in range(0,12):
    collection.add(ids= [id_text+str(i)], documents= [text+str(i)])

Add of existing embedding ID: id1
Insert of existing embedding ID: id1
Add of existing embedding ID: id2
Insert of existing embedding ID: id2


In [39]:
collection.get(ids= ["id1", "id3"])

{'ids': ['id1', 'id3'],
 'embeddings': None,
 'metadatas': [None, None],
 'documents': ['this is document 1', 'this is text3'],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents']}

In [41]:
collection.upsert(ids= ["id1"], documents= ["new content"])

In [43]:
collection.get(ids= ["id1", "id3"])

{'ids': ['id1', 'id3'],
 'embeddings': None,
 'metadatas': [None, None],
 'documents': ['new content', 'this is text3'],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents']}

In [163]:
collections= client.list_collections()
for i in collections:
    print(i.name)

temp-collection
temp1-collection


In [165]:
fruits= client.create_collection(name= "fruit-data", embedding_function= embeddings)

In [167]:
documents= ["Apples are the most consumed fruit in the US", \
            "Oranges are a great source of vitamin C.",
            "Grapes come in over 8,000 varieties worldwide.",
            "Strawberries are the only fruit with seeds on the outside.",
            "Peaches are drupes, or stone fruits, related to cherries and almonds.",
            "Lemons are used for their juice, zest, and essential oil.", 
            "Cherries are the smallest fruits found on trees.", 
            "Pineapples are actually a collection of many flowers.",
            "Mangoes are the national fruit of India.",
            "Kiwis are native to China, despite their New Zealand namesake."]

In [169]:
metadata= []
for i in documents:
    data= {}
    data["context"]= i.split()[0]
    metadata.append(data)

In [177]:
fruits.add(ids= [str(i) for i in range(1,11)], documents= documents, metadatas= metadata)

In [179]:
fruits.get()

{'ids': ['1', '10', '2', '3', '4', '5', '6', '7', '8', '9'],
 'embeddings': None,
 'metadatas': [{'context': 'Apples'},
  {'context': 'Kiwis'},
  {'context': 'Oranges'},
  {'context': 'Grapes'},
  {'context': 'Strawberries'},
  {'context': 'Peaches'},
  {'context': 'Lemons'},
  {'context': 'Cherries'},
  {'context': 'Pineapples'},
  {'context': 'Mangoes'}],
 'documents': ['Apples are the most consumed fruit in the US',
  'Kiwis are native to China, despite their New Zealand namesake.',
  'Oranges are a great source of vitamin C.',
  'Grapes come in over 8,000 varieties worldwide.',
  'Strawberries are the only fruit with seeds on the outside.',
  'Peaches are drupes, or stone fruits, related to cherries and almonds.',
  'Lemons are used for their juice, zest, and essential oil.',
  'Cherries are the smallest fruits found on trees.',
  'Pineapples are actually a collection of many flowers.',
  'Mangoes are the national fruit of India.'],
 'uris': None,
 'data': None,
 'included': ['meta

In [187]:
results= fruits.query(
    query_texts= ["India is the 7th largest country"],
    n_results= 2
)
results

{'ids': [['9', '10']],
 'distances': [[1.1871251939099179, 1.5962152236937135]],
 'metadatas': [[{'context': 'Mangoes'}, {'context': 'Kiwis'}]],
 'embeddings': None,
 'documents': [['Mangoes are the national fruit of India.',
   'Kiwis are native to China, despite their New Zealand namesake.']],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents', 'distances']}

In [189]:
fruits.name

'fruit-data'

In [191]:
results= fruits.query(
    query_texts= ["Apples are the most consumed fruit in the US"],
    n_results= 2
)
results

{'ids': [['1', '7']],
 'distances': [[3.1502551239860323e-13, 1.1242562254760096]],
 'metadatas': [[{'context': 'Apples'}, {'context': 'Cherries'}]],
 'embeddings': None,
 'documents': [['Apples are the most consumed fruit in the US',
   'Cherries are the smallest fruits found on trees.']],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents', 'distances']}

In [61]:
fruits.count()

10

In [199]:
results= fruits.get(where= {"context": { "$in": ["Apples", "mangoes"]}})

In [201]:
results

{'ids': ['1'],
 'embeddings': None,
 'metadatas': [{'context': 'Apples'}],
 'documents': ['Apples are the most consumed fruit in the US'],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents']}

#### "where" works on metadata, add more keys to the metadata

In [221]:
results= fruits.get(where= {"$and" : [{"context": {"$in": ["Apples", "Oranges"]}}, {"ids": {"$eq": '1'}}]})

In [223]:
results

{'ids': ['1', '2'],
 'embeddings': None,
 'metadatas': [{'context': 'Apples'}, {'context': 'Oranges'}],
 'documents': ['Apples are the most consumed fruit in the US',
  'Oranges are a great source of vitamin C.'],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents']}