In [1]:
!pip install -e ../
import os

if os.path.exists("my_collection.npz"):
    os.remove("my_collection.npz")

import numpy as np
import vlite
from vlite.main import VLite
from vlite.utils import process_pdf
import os

import importlib
importlib.reload(vlite)

# Create a new VLite instance
vlite = VLite("my_collection")

# Add a single text to the collection
text1 = "This is the first text."
metadata1 = {"source": "example1", "tags": ["text", "example"]}
vlite.add(text1, metadata=metadata1)

# Add multiple texts to the collection
texts = [
    "This is the second text.",
    "This is the third text.",
    "This is the fourth text."
]
metadata2 = {"source": "example2", "tags": ["text", "example"]}
vlite.add(texts, metadata=metadata2)

# Add a text with a specific ID
text3 = "This is the fifth text."
metadata3 = {"id":"custom_id", "source": "example3", "tags": ["text", "example"]}
vlite.add(text3, metadata=metadata3)

# Retrieve similar texts based on a query
query = "What is the text about?"
similar_texts, scores, metadata = vlite.retrieve(query, top_k=3)
print("Similar texts:")
for text, score, meta in zip(similar_texts, scores, metadata):
    print(f"Text: {text}")
    print(f"Score: {score}")
    print(f"Metadata: {meta}")
    print()

# Retrieve a text by ID
text_id = "custom_id"
vlite.add("This is the fifth text.", metadata={"id": text_id, "source": "example3"})
text_metadata = vlite.get(ids=[text_id])

print(f"Text with ID '{text_id}':")
print(f"Metadata: {text_metadata}")

# Update a text in the collection
vlite.update("custom_id", text="This is the updated fifth text.", metadata={"source": "updated"})

# Delete a text from the collection
vlite.delete("custom_id")

# Get texts based on IDs
ids = [0, 1]
texts_by_ids = vlite.get(ids=ids)
print(f"Texts with IDs {ids}:")
for text, meta in texts_by_ids:
    print(f"Text: {text}")
    print(f"Metadata: {meta}")
    print()

# Get texts based on metadata
metadata_filter = {"source": "example2"}
texts_by_metadata = vlite.get(where=metadata_filter)
print(f"Texts with metadata {metadata_filter}:")
for text, meta in texts_by_metadata:
    print(f"Text: {text}")
    print(f"Metadata: {meta}")
    print()

# Set metadata for a text
vlite.set(0, metadata={"updated": True})

# Set text content for a text
vlite.set(1, text="This is the updated second text.")

# Set vector for a text
new_vector = np.random.rand(vlite.model.dimension)
vlite.set(2, vector=new_vector)

# Get the count of texts in the collection
count = vlite.count()
print(f"Total texts in the collection: {count}")

# Get information about the collection
vlite.info()

# Clear the entire collection
vlite.clear()

# Process a PDF and add its contents to the collection
pdf_path = "data/gpt-4.pdf"
pdf_texts = process_pdf(pdf_path)
vlite.add(pdf_texts)

# Dump the collection data
collection_data = vlite.dump()
print("Collection data:")
print(collection_data)

Obtaining file:///Users/sdan/Developer/vlite
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: vlite
  Attempting uninstall: vlite
    Found existing installation: vlite 1.1.1
    Uninstalling vlite-1.1.1:
      Successfully uninstalled vlite-1.1.1
  Running setup.py develop for vlite
Successfully installed vlite-1.1.1


  from .autonotebook import tqdm as notebook_tqdm


Collection file my_collection.npz not found. Initializing empty attributes.
Adding text to the collection...
Saving collection to my_collection.npz
Collection saved successfully.
Text added successfully.
Adding text to the collection...
Saving collection to my_collection.npz
Collection saved successfully.
Text added successfully.
Adding text to the collection...
Saving collection to my_collection.npz
Collection saved successfully.
Text added successfully.
Retrieving similar texts...
Retrieving top 3 similar texts for query: What is the text about?
Retrieval completed.
Similar texts:
Text: This is the first text.
Score: This is the second text.
Metadata: This is the third text.

Text: 0.805891860341501
Score: 0.7846782314018346
Metadata: 0.7761037985264687

Text: {'source': 'example1', 'tags': ['text', 'example'], 'id': '38fbc914-3433-4035-8841-3d285c88d2e6'}
Score: {'source': 'example2', 'tags': ['text', 'example'], 'id': 'd3419579-3397-45d0-8a1b-256aee6e2933'}
Metadata: {'source': 'ex

AttributeError: 'int' object has no attribute 'get'