In [1]:
!pip install -e ../
import os

if os.path.exists("my_collection.npz"):
    os.remove("my_collection.npz")

import numpy as np
import vlite
from vlite.main import VLite
from vlite.utils import process_pdf
import os

import importlib
importlib.reload(vlite)

# Create a new VLite instance
vlite = VLite("my_collection")

# Add a single text to the collection
text1 = "This is the first text."
metadata1 = {"source": "example1", "tags": ["text", "example"]}
vlite.add(text1, metadata=metadata1)

# Add multiple texts to the collection
texts = [
    "This is the second text.",
    "This is the third text.",
    "This is the fourth text."
]
metadata2 = {"source": "example2", "tags": ["text", "example"]}
vlite.add(texts, metadata=metadata2)

# Add a text with a specific ID
text3 = "This is the fifth text."
metadata3 = {"id":"custom_id", "source": "example3", "tags": ["text", "example"]}
vlite.add(text3, metadata=metadata3)

# Retrieve similar texts based on a query
query = "What is the text about?"
similar_texts, scores, metadata = vlite.retrieve(query, top_k=3)
print("Similar texts:")
for text, score, meta in zip(similar_texts, scores, metadata):
    print(f"Text: {text}")
    print(f"Score: {score}")
    print(f"Metadata: {meta}")
    print()

# Retrieve a text by ID
text_id = "custom_id"
vlite.add("This is the fifth text.", metadata={"id": text_id, "source": "example3"})
text_metadata = vlite.get(ids=[text_id])

print(f"Text with ID '{text_id}':")
print(f"Metadata: {text_metadata}")

# Update a text in the collection
vlite.update("custom_id", text="This is the updated fifth text.", metadata={"source": "updated"})

# Delete a text from the collection
vlite.delete("custom_id")

# Get texts based on IDs
ids = [0, 1]
texts_by_ids = vlite.get(ids=ids)
print(f"Texts with IDs {ids}:")
for text, meta in texts_by_ids:
    print(f"Text: {text}")
    print(f"Metadata: {meta}")
    print()

# Get texts based on metadata
metadata_filter = {"source": "example2"}
texts_by_metadata = vlite.get(where=metadata_filter)
print(f"Texts with metadata {metadata_filter}:")
for text, meta in texts_by_metadata:
    print(f"Text: {text}")
    print(f"Metadata: {meta}")
    print()

# Set metadata for a text
vlite.set(0, metadata={"updated": True})

# Set text content for a text
vlite.set(1, text="This is the updated second text.")

# Set vector for a text
new_vector = np.random.rand(vlite.model.dimension)
vlite.set(2, vector=new_vector)

# Get the count of texts in the collection
count = vlite.count()
print(f"Total texts in the collection: {count}")

# Get information about the collection
vlite.info()

# Clear the entire collection
vlite.clear()

# Process a PDF and add its contents to the collection
pdf_path = "data/attention.pdf"
pdf_texts = process_pdf(pdf_path)
vlite.add(pdf_texts, need_chunks=False)

# Query the collection 
query = "What is attention?"
similar_texts, scores, metadata = vlite.retrieve(query, top_k=3)
print("Similar texts:")
for text, score, meta in zip(similar_texts, scores, metadata):
    print(f"Text: {text}")
    print(f"Score: {score}")
    print(f"Metadata: {meta}")
    print()

# Dump the collection data
collection_data = vlite.dump()
print("Collection data:")
print(collection_data)

Obtaining file:///Users/sdan/Developer/vlite
  Preparing metadata (setup.py) ... [?25ldone
Collecting surya-ocr
  Downloading surya_ocr-0.3.0-py3-none-any.whl (69 kB)
     |████████████████████████████████| 69 kB 8.4 MB/s             
Collecting pydantic<3.0.0,>=2.5.3
  Downloading pydantic-2.6.4-py3-none-any.whl (394 kB)
     |████████████████████████████████| 394 kB 16.9 MB/s            
[?25hCollecting transformers
  Downloading transformers-4.36.2-py3-none-any.whl (8.2 MB)
     |████████████████████████████████| 8.2 MB 89.8 MB/s            
[?25hCollecting pypdfium2<5.0.0,>=4.25.0
  Downloading pypdfium2-4.28.0-py3-none-macosx_11_0_arm64.whl (2.7 MB)
     |████████████████████████████████| 2.7 MB 10.9 MB/s            
[?25hCollecting pydantic-settings<3.0.0,>=2.1.0
  Downloading pydantic_settings-2.2.1-py3-none-any.whl (13 kB)
Collecting pillow<11.0.0,>=10.2.0
  Downloading pillow-10.3.0-cp310-cp310-macosx_11_0_arm64.whl (3.4 MB)
     |████████████████████████████████| 3.4 MB 2

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


Collection file my_collection.npz not found. Initializing empty attributes.
Adding text to the collection...
Saving collection to my_collection.npz
Collection saved successfully.
Text added successfully.
Adding text to the collection...
Saving collection to my_collection.npz
Collection saved successfully.
Text added successfully.
Adding text to the collection...
Saving collection to my_collection.npz
Collection saved successfully.
Text added successfully.
Retrieving similar texts...
Retrieving top 3 similar texts for query: What is the text about?
Retrieval completed.
Similar texts:
Text: This is the first text.
Score: This is the second text.
Metadata: This is the third text.

Text: 0.8058918714523315
Score: 0.7846782207489014
Metadata: 0.7761037945747375

Text: {'source': 'example1', 'tags': ['text', 'example'], 'id': '4e58dba6-df3e-4f77-81d9-e9798b7362d8'}
Score: {'source': 'example2', 'tags': ['text', 'example'], 'id': 'c54a1edd-4399-446c-b3aa-4e14d91cc7b5'}
Metadata: {'source': 'e

In [1]:
from vlite.main import VLite
from vlite.utils import process_pdf
vlite = VLite("my_collection")
# Process a PDF and add its contents to the collection
pdf_path = "data/attention.pdf"
pdf_texts = process_pdf(pdf_path, use_ocr=True)
vlite.add(pdf_texts, need_chunks=True)
vlite.retrieve("What is attention?", top_k=10)

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(


Collection file my_collection.npz not found. Initializing empty attributes.
Using OCR with languages: ['en']
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|██████████| 2/2 [01:16<00:00, 38.39s/it]
Recognizing Text: 100%|██████████| 1/1 [00:23<00:00, 23.56s/it]


[OCRResult(text_lines=[TextLine(polygon=[[279.0, 130.0], [535.0, 130.0], [535.0, 155.0], [279.0, 155.0]], confidence=0.9611628651618958, text='Attention Is All You Need', bbox=[279.0, 130.0, 535.0, 155.0]), TextLine(polygon=[[175.0, 245.0], [272.0, 245.0], [272.0, 259.0], [175.0, 259.0]], confidence=0.921112060546875, text='Ashish Vaswani*', bbox=[175.0, 245.0, 272.0, 259.0]), TextLine(polygon=[[448.0, 245.0], [529.0, 245.0], [529.0, 259.0], [448.0, 259.0]], confidence=0.8982121348381042, text='Niki Parmar*', bbox=[448.0, 245.0, 529.0, 259.0]), TextLine(polygon=[[563.0, 245.0], [662.0, 245.0], [662.0, 259.0], [563.0, 259.0]], confidence=0.9294289946556091, text='Jakob Uszkoreit*', bbox=[563.0, 245.0, 662.0, 259.0]), TextLine(polygon=[[316.0, 246.0], [405.0, 246.0], [405.0, 260.0], [316.0, 260.0]], confidence=0.895263671875, text='Noam Shazeer*', bbox=[316.0, 246.0, 405.0, 260.0]), TextLine(polygon=[[182.0, 261.0], [261.0, 261.0], [261.0, 275.0], [182.0, 275.0]], confidence=0.9204852581

[('Attention Is All You Need Ashish Vaswani* Niki Parmar* Jakob Uszkoreit* Noam Shazeer* Google Brain Google Brain Google Research Google Research avaswani@google.com noam@google.com nikip@google.com usz@google.com Llion Jones* Aidan N. Gomez* ↑ Łukasz Kaiser* Google Research University of Toronto Google Brain llion@google.com aidan@cs.toronto.edu lukaszkaiser@google.com Illia Polosukhin* † illia.polosukhin@gmail.com Abstract The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our m

In [5]:
pdf_texts

['Attention Is All You Need Ashish Vaswani* Niki Parmar* Jakob Uszkoreit* Noam Shazeer* Google Brain Google Brain Google Research Google Research avaswani@google.com noam@google.com nikip@google.com usz@google.com Llion Jones* Aidan N. Gomez* ↑ Łukasz Kaiser* Google Research University of Toronto Google Brain llion@google.com aidan@cs.toronto.edu lukaszkaiser@google.com Illia Polosukhin* † illia.polosukhin@gmail.com Abstract The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our mo

In [1]:
from vlite.utils import process_pdf
process_pdf("data/attention.pdf", use_ocr=True)

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(


Using OCR with languages: ['en']
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16
Max pages: 18 Start page: 0


Detecting bboxes: 100%|██████████| 2/2 [01:27<00:00, 43.73s/it]
Recognizing Text: 100%|██████████| 1/1 [00:25<00:00, 25.06s/it]


[OCRResult(text_lines=[TextLine(polygon=[[279.0, 130.0], [535.0, 130.0], [535.0, 155.0], [279.0, 155.0]], confidence=0.9611628651618958, text='Attention Is All You Need', bbox=[279.0, 130.0, 535.0, 155.0]), TextLine(polygon=[[175.0, 245.0], [272.0, 245.0], [272.0, 259.0], [175.0, 259.0]], confidence=0.921112060546875, text='Ashish Vaswani*', bbox=[175.0, 245.0, 272.0, 259.0]), TextLine(polygon=[[448.0, 245.0], [529.0, 245.0], [529.0, 259.0], [448.0, 259.0]], confidence=0.8982121348381042, text='Niki Parmar*', bbox=[448.0, 245.0, 529.0, 259.0]), TextLine(polygon=[[563.0, 245.0], [662.0, 245.0], [662.0, 259.0], [563.0, 259.0]], confidence=0.9294289946556091, text='Jakob Uszkoreit*', bbox=[563.0, 245.0, 662.0, 259.0]), TextLine(polygon=[[316.0, 246.0], [405.0, 246.0], [405.0, 260.0], [316.0, 260.0]], confidence=0.895263671875, text='Noam Shazeer*', bbox=[316.0, 246.0, 405.0, 260.0]), TextLine(polygon=[[182.0, 261.0], [261.0, 261.0], [261.0, 275.0], [182.0, 275.0]], confidence=0.9204852581

['Attention Is All You Need Ashish Vaswani* Niki Parmar* Jakob Uszkoreit* Noam Shazeer* Google Brain Google Brain Google Research Google Research avaswani@google.com noam@google.com nikip@google.com usz@google.com Llion Jones* Aidan N. Gomez* ↑ Łukasz Kaiser* Google Research University of Toronto Google Brain llion@google.com aidan@cs.toronto.edu lukaszkaiser@google.com Illia Polosukhin* † illia.polosukhin@gmail.com Abstract The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our mo

In [1]:

from PIL import Image
from surya.ocr import run_ocr
from surya.model.detection import segformer
from surya.model.recognition.model import load_model
from surya.model.recognition.processor import load_processor
from surya.input.load import load_from_folder, load_from_file, load_pdf

import pkg_resources

surya_version = pkg_resources.get_distribution("surya-ocr").version
print(surya_version)

images , _ = load_from_file("data/attention2.pdf")
print("rr len", len(images))
langs = ["en"] * len(images) 
print(f"len of images: {len(images)} and len of langs: {len(langs)}")
det_processor, det_model = segformer.load_processor(), segformer.load_model()
rec_model, rec_processor = load_model(), load_processor()
print("now starting")
predictions = run_ocr(images, langs, det_model, det_processor, rec_model, rec_processor)

  from .autonotebook import tqdm as notebook_tqdm


0.3.0
INDDD [0, 1]
imss,images
len imgs 2
recv imgs 2
rexc len nam 2
rr len 2
len of images: 2 and len of langs: 2
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16
now starting
[batch__detection] Number of images: 2
[batch__detection] Number of splits: 2
[batch__detection] Number of images: 2


Detecting bboxes: 100%|██████████| 1/1 [00:19<00:00, 19.52s/it]

[batch__detection] Number of pred_parts: 2 preds: 0
[batch__detection] Number of split_index: 2
[batch__detection] Number of split_heights: 2
[batch__detection] afte3r Number of preds: 2
[batch_text_detection] Number of images: 2





[batch_text_detection] Number of results: 2
Det predictions: [TextDetectionResult(bboxes=[PolygonBox(polygon=[[279, 130], [535, 130], [535, 155], [279, 155]], confidence=0.9594303965568542, bbox=[279, 130, 535, 155]), PolygonBox(polygon=[[175, 245], [272, 245], [272, 259], [175, 259]], confidence=0.903630256652832, bbox=[175, 245, 272, 259]), PolygonBox(polygon=[[448, 245], [529, 245], [529, 259], [448, 259]], confidence=0.9135720133781433, bbox=[448, 245, 529, 259]), PolygonBox(polygon=[[563, 245], [662, 245], [662, 259], [563, 259]], confidence=0.9150099754333496, bbox=[563, 245, 662, 259]), PolygonBox(polygon=[[316, 246], [405, 246], [405, 260], [316, 260]], confidence=0.895868182182312, bbox=[316, 246, 405, 260]), PolygonBox(polygon=[[182, 261], [261, 261], [261, 275], [182, 275]], confidence=0.879332423210144, bbox=[182, 261, 261, 275]), PolygonBox(polygon=[[320, 261], [399, 261], [399, 275], [320, 275]], confidence=0.8601277470588684, bbox=[320, 261, 399, 275]), PolygonBox(polygo

Recognizing Text:   0%|          | 0/2 [00:00<?, ?it/s]

Batch images: [<PIL.Image.Image image mode=RGB size=257x26 at 0x2F99AF190>, <PIL.Image.Image image mode=RGB size=98x15 at 0x2E64E2500>, <PIL.Image.Image image mode=RGB size=82x15 at 0x2E64E0100>, <PIL.Image.Image image mode=RGB size=100x15 at 0x2E64E2530>, <PIL.Image.Image image mode=RGB size=90x15 at 0x2E64E2560>, <PIL.Image.Image image mode=RGB size=80x15 at 0x2E64E2590>, <PIL.Image.Image image mode=RGB size=80x15 at 0x2E64E25C0>, <PIL.Image.Image image mode=RGB size=98x14 at 0x2E64E25F0>, <PIL.Image.Image image mode=RGB size=97x15 at 0x2E64E2620>, <PIL.Image.Image image mode=RGB size=139x16 at 0x2E64E2650>, <PIL.Image.Image image mode=RGB size=112x16 at 0x2E64E2680>, <PIL.Image.Image image mode=RGB size=119x16 at 0x2E64E26B0>, <PIL.Image.Image image mode=RGB size=103x16 at 0x2E64E26E0>, <PIL.Image.Image image mode=RGB size=114x16 at 0x2E64E2710>, <PIL.Image.Image image mode=RGB size=75x15 at 0x2E64E2740>, <PIL.Image.Image image mode=RGB size=91x14 at 0x2E64E2770>, <PIL.Image.Image i

Recognizing Text:  50%|█████     | 1/2 [00:38<00:38, 38.48s/it]

Batch images: [<PIL.Image.Image image mode=RGB size=302x15 at 0x2E64E30A0>, <PIL.Image.Image image mode=RGB size=534x15 at 0x2E64E30D0>, <PIL.Image.Image image mode=RGB size=534x15 at 0x2E64E3100>, <PIL.Image.Image image mode=RGB size=534x17 at 0x2E64E3130>, <PIL.Image.Image image mode=RGB size=264x16 at 0x2E64E3160>, <PIL.Image.Image image mode=RGB size=533x15 at 0x2E64E3190>, <PIL.Image.Image image mode=RGB size=535x16 at 0x2E64E31C0>, <PIL.Image.Image image mode=RGB size=534x16 at 0x2E64E31F0>, <PIL.Image.Image image mode=RGB size=464x15 at 0x2E64E3220>, <PIL.Image.Image image mode=RGB size=16x16 at 0x2E64E3250>, <PIL.Image.Image image mode=RGB size=90x17 at 0x2E64E3280>, <PIL.Image.Image image mode=RGB size=532x17 at 0x2E64E32B0>, <PIL.Image.Image image mode=RGB size=532x16 at 0x2E64E32E0>, <PIL.Image.Image image mode=RGB size=534x16 at 0x2E64E3310>, <PIL.Image.Image image mode=RGB size=533x15 at 0x2E64E3340>, <PIL.Image.Image image mode=RGB size=533x15 at 0x2E64E3370>, <PIL.Image.

Recognizing Text: 100%|██████████| 2/2 [01:28<00:00, 44.36s/it]

PLines image 0/2
slice_start: 0, slice_end: 57
Lines: [TextLine(polygon=[[279.0, 130.0], [535.0, 130.0], [535.0, 155.0], [279.0, 155.0]], confidence=0.14995373785495758, text='Attention Is', bbox=[279.0, 130.0, 535.0, 155.0]), TextLine(polygon=[[175.0, 245.0], [272.0, 245.0], [272.0, 259.0], [175.0, 259.0]], confidence=0.6880332231521606, text='  ', bbox=[175.0, 245.0, 272.0, 259.0]), TextLine(polygon=[[448.0, 245.0], [529.0, 245.0], [529.0, 259.0], [448.0, 259.0]], confidence=0.15183010697364807, text='Niki  Parmar*', bbox=[448.0, 245.0, 529.0, 259.0]), TextLine(polygon=[[563.0, 245.0], [662.0, 245.0], [662.0, 259.0], [563.0, 259.0]], confidence=0.18944701552391052, text='Jakob Uszkoreit*', bbox=[563.0, 245.0, 662.0, 259.0]), TextLine(polygon=[[316.0, 246.0], [405.0, 246.0], [405.0, 260.0], [316.0, 260.0]], confidence=0.024873193353414536, text='  *', bbox=[316.0, 246.0, 405.0, 260.0]), TextLine(polygon=[[182.0, 261.0], [261.0, 261.0], [261.0, 275.0], [182.0, 275.0]], confidence=0.150




In [6]:
texts = []
for prediction in predictions:
    text = ' '.join([result.text for result in prediction.text_lines])
    texts.append(text)

print(texts)


['Attention Is    Niki  Parmar* Jakob Uszkoreit*   * Google Brain Google Brain        noam�google.com nikip�google.com usz�google.com Lion Jones*           Google Brain llion�google.com aidan�cs.toronto lukaszkaiser�google.com Illia Polosukhin*  † illia.polosukhin�gmail.com Abstract The dominant sequence transduction models are sequence transd coder. The best    mechanism.  We propose a new simple  on on olutions entirely.  Experiments on two machine two be superior in quality    to-German translation task, improving over the extreme to  by over 2 BLEU. On the WMT 2014 Ensembles,     fraction of the training costs of the best models 1 Introduction                 Equal contribution. Listing order is random. Equal contriber en e             tensor2tensor. Llion also experimented with novel model variants tensor. Llion also experimented wi       our research.  Work performed.  Work performed   ', '       and the input for position t . This inherently             constraint of sequential 

In [10]:
import pypdfium2
pypdfium2.PdfDocument("data/attention.pdf")
page_indices = list(range(0, len(pypdfium2.PdfDocument("data/attention.pdf"))))
print(page_indices)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


In [7]:
text = ' '.join([result.text for result in predictions[0].text_lines])
print(text)

Attention Is All You Need Ashish Vaswani* Niki Parmar* Jakob Uszkoreit* Noam Shazeer* Google Brain Google Brain Google Research Google Research avaswani@google.com noam@google.com nikip@google.com usz@google.com Llion Jones* Aidan N. Gomez* ↑ Łukasz Kaiser* Google Research University of Toronto Google Brain llion@google.com aidan@cs.toronto.edu lukaszkaiser@google.com Illia Polosukhin* † illia.polosukhin@gmail.com Abstract The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our mode

In [2]:
from vlite.main import VLite
from vlite.model import EmbeddingModel
vlite = VLite("test_llama")
# load text from data/text-8192tokens.txt
with open("data/text-8192tokens.txt", "r") as f:
    text = f.read()

model = EmbeddingModel()
embds = model.embed(text)
embds_old = model.old_embed(text)

embds = [item["embedding"] for item in embds["data"]]

print(f"Embedding : {embds}")
print(f"Old embedding : {embds_old}")

# more statistics on the embeddings like len 
print(f"Number of embeddings : {len(embds[0])}")
print(f"Number of embeddings old : {len(embds_old[0])}")




llama_model_loader: loaded meta data with 23 key-value pairs and 389 tensors from /Users/sdan/Developer/vlite/vlite/models/mxbai-embed-large-v1-f16.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = bert
llama_model_loader: - kv   1:                               general.name str              = mxbai-embed-large-v1
llama_model_loader: - kv   2:                           bert.block_count u32              = 24
llama_model_loader: - kv   3:                        bert.context_length u32              = 512
llama_model_loader: - kv   4:                      bert.embedding_length u32              = 1024
llama_model_loader: - kv   5:                   bert.feed_forward_length u32              = 4096
llama_model_loader: - kv   6:                  bert.attention.head_count u32              = 16
llama_model_loader: - kv   7:   

Embedding : [[0.018481637099219947, -0.06751283090320145, 0.013557901448916032, 0.03122558922667019, -0.007270908957257999, 0.0022115125329453473, -0.010972675534822524, 0.002938762152642013, 0.025710664019259227, 0.008960964123499456, -0.009221052412863326, -0.010578838189463582, 0.008349996712373634, 0.002192666652724842, 0.013889722878230598, -0.012400001518828807, -0.017607746720204577, -0.025122936465400876, -0.05778649636713881, 0.004918041506715364, 0.005789833592691695, 0.013303820514837908, -0.04872032015324099, -0.024767319059057203, -0.014535874553562629, 0.042908333254939285, 0.008850139459753097, 0.028865440392830347, 0.04436600517109268, 0.02139505028561195, 0.023455539799463753, -0.007148875370730914, -0.0035211556436975074, -0.04720171481672186, 0.005912522445129169, -0.018686372093735578, 0.0349997910791843, 0.007013039738968924, 0.007459390743342988, -0.006395593577686473, 0.036542638550401914, -0.03332700086275507, 0.018758889389590038, -0.014557854353412322, -0.0208

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
llama_model_loader: loaded meta data with 23 key-value pairs and 389 tensors from /Users/sdan/Developer/vlite/vlite/models/mxbai-embed-large-v1-f16.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = bert
llama_model_loader: - kv   1:                               general.name str              = mxbai-embed-large-v1
llama_model_loader: - kv   2:                           bert.block_count u32              = 24
llama_model_loader: - kv   3:                        bert.context_length u32              = 512
llama_model_loader: - kv   4:                      bert.embedding_length u32              = 1024
llama_model_loader: - kv   5:                   bert.feed_forward_length u32              = 4096
llama_model_loader: - kv   6:         

Adding text to the collection...
Lenght of text: 1
Chopped text into 348 chunks.


AttributeError: 'VLite' object has no attribute 'old_model'

In [3]:
import numpy as np
import vlite
from vlite.main import VLite
from vlite.utils import process_pdf
import os

import importlib
importlib.reload(vlite)

# Create a new VLite instance
vlite = VLite("test_collection")

# Add a single text to the collection
text1 = "This is the first text."
metadata1 = {"source": "example1", "tags": ["text", "example"]}
vlite.add(text1, metadata=metadata1)

llama_model_loader: loaded meta data with 23 key-value pairs and 389 tensors from /Users/sdan/.cache/huggingface/hub/models--mixedbread-ai--mxbai-embed-large-v1/snapshots/456b7cfe38ad0f470a4e2074798eae56e5904bf6/gguf/mxbai-embed-large-v1-f16.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = bert
llama_model_loader: - kv   1:                               general.name str              = mxbai-embed-large-v1
llama_model_loader: - kv   2:                           bert.block_count u32              = 24
llama_model_loader: - kv   3:                        bert.context_length u32              = 512
llama_model_loader: - kv   4:                      bert.embedding_length u32              = 1024
llama_model_loader: - kv   5:                   bert.feed_forward_length u32              = 4096
llama_model_loader: - kv   6:    

Downloaded model to /Users/sdan/.cache/huggingface/hub/models--mixedbread-ai--mxbai-embed-large-v1/snapshots/456b7cfe38ad0f470a4e2074798eae56e5904bf6/gguf/mxbai-embed-large-v1-f16.gguf
Adding text to the collection...
Lenght of text: 1
Original text: ['This is the first text.']
Chopped text into this chunk: ['This is the first text.']
Chopped text into 1 chunks.
Saving collection to test_collection.npz
Collection saved successfully.
Text added successfully.


[('a58d2ffc-c43a-42d8-bb76-ebcac40f62f1',
  [[-0.015403256326941726,
    -0.04375677505188196,
    0.03295014519585718,
    0.012211036800445389,
    -0.02269887602256177,
    -0.004221641539613182,
    0.009043566796961586,
    -0.019706791272254946,
    0.04740289814818726,
    0.01357797787966627,
    -0.02658735128704043,
    0.017808808794673003,
    0.012398091601786076,
    0.03353798799614398,
    -0.04431361546752158,
    -0.026347416736018453,
    -0.03478339745934415,
    0.005139328416633254,
    -0.04596489298596065,
    0.010477229069107991,
    -0.026934096631852686,
    0.030946424820852955,
    -0.10189768449274587,
    -0.050309964862138415,
    -0.01460200705839647,
    0.012350764450841534,
    0.0008631959826250893,
    0.024935366305056685,
    0.060936366129955837,
    0.045525235895776464,
    0.017537747647847143,
    0.03834693633790038,
    0.013142613275034938,
    -0.02125291414565971,
    -0.007263016067148465,
    -0.013499259509149224,
    0.030338344602