# Milvus Hybrid Search

TODO

## Installation

First, you need to install `pymilvus` python package.

In [None]:
%pip install --upgrade --quiet pymilvus>=2.4.0

## Examples

In [1]:
from typing import Dict, List
import random

from langchain_core.embeddings import Embeddings
from langchain_community.retrievers.milvus_hybrid_search import MilvusHybridSearchRetriever, SparseEmbeddings



class FakeDenseEmbeddings(Embeddings):
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        return [[random.random() for i in range(3)] for text in texts]
    
    def embed_query(self, text: str) -> List[float]:
        return self.embed_documents([text])[0]

class FakeSparseEmbeddings(SparseEmbeddings):
    def embed_documents(self, texts: List[str]) -> List[Dict[int, float]]:
        n = 100
        sparse_vectors = []
        for text in texts:
            vector_dict = {}
            k = random.randint(0, 4)
            for i in range(k):
                vector_dict[random.randint(0, n)] = random.random()
            # hack: This maybe Mivus's Bug
            if not vector_dict:
                vector_dict = {0: 0.000001}
            sparse_vectors.append(vector_dict)
        return sparse_vectors
    def embed_query(self, text: str) -> List[float]:
        return self.embed_documents([text])[0]

In [2]:
retriever = MilvusHybridSearchRetriever(
    embedding_functions={
        "dense_1": FakeDenseEmbeddings(),
        "dense_2": FakeDenseEmbeddings(),
    },
    sparse_embedding_functions={
        "sparse_1": FakeSparseEmbeddings(),
        "sparse_2": FakeSparseEmbeddings()
    },
    drop_old=True
)

In [3]:
retriever.add_texts(
    ["a", "b", "c", "d", "e"],
    ids=["id_a", "id_b", "id_c", "id_d", "id_e"]
)

['id_a', 'id_b', 'id_c', 'id_d', 'id_e']

In [4]:
docs = retriever.get_relevant_documents(query="d", k=3)

In [5]:
docs

[Document(page_content='d'),
 Document(page_content='b'),
 Document(page_content='a')]