In [None]:
from chroma import Collection

# Initialize a Chroma collection
collection = Collection("my_collection")

# Sample data (replace with your actual data)
articles = [
    {
        "id": "article1",
        "abstract": "This is the abstract of article 1.",
        "embedding": [0.1, 0.2, 0.3]
    },
    {
        "id": "article2",
        "abstract": "This is the abstract of article 2.",
        "embedding": [0.4, 0.5, 0.6]
    },
    # ... more articles
]

# Add data to the collection
for article in articles:
    # Option 1: Generate embedding automatically from abstract
    collection.add_document(article["abstract"], metadata={"id": article["id"]})

    # Option 2: Use pre-generated embedding
    # collection.add_document(article["abstract"], embedding=article["embedding"], metadata={"id": article["id"]})

# Search for similar articles based on abstract and embedding
query_abstract = "This is a query abstract."
query_embedding = [0.7, 0.8, 0.9]

results = collection.query(query_embedding, query=query_abstract)

# Print retrieved article IDs
for result in results:
    print(f"Similar article ID: {result.metadata['id']}")

# Alternatively, search based on article ID:
article_id = "article2"
result = collection.get(article_id)

if result:
    print(f"Retrieved article abstract: {result.text}")
    print(f"Retrieved article embedding: {result.embedding}")
    print(f"Retrieved article ID: {result.metadata['id']}")


## Persist to json

In [None]:
# Export the collection
collection.export_to_json("my_collection.json")

# To load from the exported file:
new_collection = Collection.from_json("my_collection.json")


## Persist to LMDB

In [None]:
# Export the collection to LMDB
collection.export_to_lmdb("my_collection.lmdb")

# To load from the LMDB file:
new_collection = Collection.from_lmdb("my_collection.lmdb")
