# Milvus Document Store haystack example
https://haystack.deepset.ai/integrations/milvus-document-store

In [1]:
# Create the indexing Pipeline and index some documents
import glob
import os

from haystack import Pipeline
from haystack.components.converters import MarkdownToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter

from milvus_haystack import MilvusDocumentStore
from milvus_haystack.milvus_embedding_retriever import MilvusEmbeddingRetriever

ModuleNotFoundError: No module named 'milvus_haystack'

In [2]:
file_paths = glob.glob("/opt/data/md/milvus-document-store.md")
file_paths

['/opt/data/md/milvus-document-store.md']

In [3]:
document_store = MilvusDocumentStore(
    connection_args={
        "host": "dell",
        "port": "19530",
        "user": "",
        "password": "",
        "secure": False,
    },
    drop_old=True,
)
indexing_pipeline = Pipeline()
indexing_pipeline.add_component("converter", MarkdownToDocument())
indexing_pipeline.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2))
indexing_pipeline.add_component("embedder", SentenceTransformersDocumentEmbedder())
indexing_pipeline.add_component("writer", DocumentWriter(document_store))
indexing_pipeline.connect("converter", "splitter")
indexing_pipeline.connect("splitter", "embedder")
indexing_pipeline.connect("embedder", "writer")
indexing_pipeline.run({"converter": {"sources": file_paths}})

print("Number of documents:", document_store.count_documents())

Converting markdown files to Documents: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 565.80it/s]


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Number of documents: 45


In [4]:
# ------------------------------------------------------------------------------------
# Create the retrieval pipeline and try a query
question = "How do I install Milvus?"

retrieval_pipeline = Pipeline()
retrieval_pipeline.add_component("embedder", SentenceTransformersTextEmbedder())
retrieval_pipeline.add_component("retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=3))
retrieval_pipeline.connect("embedder", "retriever")

retrieval_results = retrieval_pipeline.run({"embedder": {"text": question}})

for doc in retrieval_results["retriever"]["documents"]:
    print(doc.content)
    print("-" * 50)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]



🚀 See an example application that uses the MilvusDocumentStore to do Milvus documentation QA here.

Installation

pip install milvus-haystack==0.
--------------------------------------------------

Milvus is a graduated-stage project of the LF AI & Data Foundation.

Use Milvus as storage for Haystack pipelines as MilvusDocumentStore.
--------------------------------------------------
0

PyPI - Version
PyPI - Python Version

Installation

pip install -U milvus-haystack


Usage

First, to start up a Milvus service, follow the 'Start Milvus' instructions in the documentation.

Then, here are the ways to build index, retrieval, and build rag pipeline respectively.
--------------------------------------------------


In [5]:
# ------------------------------------------------------------------------------------
# Create the RAG pipeline and try a query
from haystack.utils import Secret
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.builders import PromptBuilder
# from haystack.components.generators import OpenAIGenerator
from haystack.components.generators import HuggingFaceLocalGenerator
generator = HuggingFaceLocalGenerator(model="google/flan-t5-large",
                                      task="text2text-generation",
                                      generation_kwargs={
                                        "max_new_tokens": 100,
                                        "temperature": 0.8,
                                          "do_sample":True,
                                        })

prompt_template = """Answer the following query based on the provided context. If the context does
                     not include an answer, reply with 'I don't know'.\n
                     Query: {{query}}
                     Documents:
                     {% for doc in documents %}
                        {{ doc.content }}
                     {% endfor %}
                     Answer: 
                  """

In [6]:
rag_pipeline = Pipeline()
rag_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder())
rag_pipeline.add_component("retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=3))
rag_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))
rag_pipeline.add_component("generator", generator)
rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
rag_pipeline.connect("retriever.documents", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "generator")

results = rag_pipeline.run(
    {
        "text_embedder": {"text": question},
        "prompt_builder": {"query": question},
    }
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
print('RAG answer:', results["generator"]["replies"][0])

RAG answer: Install Milvus


In [8]:
print('RAG answer:', results["generator"])

RAG answer: {'replies': ['Install Milvus']}
