In [1]:
from IPython.display import clear_output

%pip install langchain-huggingface sentence-transformers ipywidgets
%pip install -e ..[all]
clear_output(wait=True)

from langchain_huggingface import HuggingFaceEmbeddings

from langchain_memvid.index import IndexManager, IndexConfig
from langchain_memvid.retriever import Retriever, RetrieverConfig
from langchain_memvid.encoder import Encoder, EncoderConfig

In [2]:
# Initialize
config = IndexConfig(index_type="Flat")
embeddings = HuggingFaceEmbeddings()
index_manager = IndexManager(config=config, embeddings=embeddings)

# Add chunks
chunks = ["text chunk 1", "text chunk 2"]
frame_numbers = [1, 2]
chunk_ids = index_manager.add_chunks(chunks, frame_numbers)

# Search
results = index_manager.search("query", top_k=5)
results

[(0,
  1.6266311407089233,
  {'id': 0, 'text': 'text chunk 1', 'frame': 1, 'length': 12}),
 (1,
  1.6356446743011475,
  {'id': 1, 'text': 'text chunk 2', 'frame': 2, 'length': 12})]

In [3]:
enc_cfg = EncoderConfig()
encoder = Encoder(enc_cfg, index_manager)
chunks = ["Important fact 1", "Important fact 2", "Historical event details"]
encoder.add_chunks(chunks)

encoder.build_video(
    output_file="output.mp4",
    index_file="index.json"
)

Generating QR codes: 100%|██████████| 3/3 [00:00<00:00, 56.91it/s]
Writing video frames:   0%|          | 0/3 [00:00<?, ?it/s]


EncodingStats(backend='opencv', codec='mp4v', total_frames=3, video_size_mb=4.1961669921875e-05, fps=15, duration_seconds=0.2, total_chunks=3, video_file='output.mp4', index_file='index.json', index_stats={'total_chunks': 5, 'total_frames': 3, 'index_type': 'Flat', 'embedding_model': 'sentence-transformers/all-mpnet-base-v2', 'dimension': 768, 'avg_chunks_per_frame': np.float64(1.6666666666666667), 'config': {'index_type': 'Flat', 'nlist': 100, 'serialization_format': 'msgpack'}}, encoding_time=0.012622594833374023)

In [4]:
ret_cfg = RetrieverConfig()
retriever = Retriever("output.mp4", "index.json", ret_cfg, index_manager)
results = retriever.search_with_metadata("machine learning algorithms", top_k=3)
results

[{'text': 'text chunk 1',
  'score': 0.3538665534668246,
  'chunk_id': 0,
  'frame': 1,
  'metadata': {'id': 0, 'text': 'text chunk 1', 'frame': 1, 'length': 12}},
 {'text': 'text chunk 2',
  'score': 0.3532919852753452,
  'chunk_id': 1,
  'frame': 2,
  'metadata': {'id': 1, 'text': 'text chunk 2', 'frame': 2, 'length': 12}},
 {'text': 'Important fact 1',
  'score': 0.34883424908162153,
  'chunk_id': 2,
  'frame': 0,
  'metadata': {'id': 2, 'text': 'Important fact 1', 'frame': 0, 'length': 16}}]