In [30]:
from langchain.retrievers import DocArrayRetriever
from docarray import BaseDoc
from docarray.index import HnswDocumentIndex
from docarray.typing import NdArray
import numpy as np
from langchain.embeddings import FakeEmbeddings


In [31]:
embeddings = FakeEmbeddings(size=768)

class MyDoc(BaseDoc):
    title: str
    title_embedding: NdArray[768]
    other_emb: NdArray[768]
    some_int: int

# initialize docarray index (in this case hnsw, but will work for any backend)
db = HnswDocumentIndex[MyDoc](work_dir='workdir')
# index data
db.index(
    [
        MyDoc(
            title=f"My document {i}",
            title_embedding=embeddings.embed_query('zd'),
            other_emb=embeddings.embed_query('all'),
            some_int=i,
        )
        for i in range(100)
    ]
)

In [32]:
retriever = DocArrayRetriever(index=db, embeddings=embeddings, search_field='title_embedding', content_field='title')


In [33]:
doc = retriever.get_relevant_documents('s2')

In [34]:
doc

[Document(page_content='My document 33', metadata={'id': 'a2757924e639bc8b7dd4d4928ee4d9dc', 'some_int': 33})]