In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "2"

Create SDK object as usual

In [2]:
from aana.sdk import AanaSDK


aana_app = AanaSDK()

  from .autonotebook import tqdm as notebook_tqdm
2024-04-26 15:44:02,509	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.

INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.


Deploy Haystack components for embeddings

In [3]:
from aana.deployments.haystack_component_deployment import (
    HaystackComponentDeployment,
    HaystackComponentDeploymentConfig,
)

In [4]:
text_embedder_deployment = HaystackComponentDeployment.options(
    num_replicas=1,
    max_concurrent_queries=1000,
    ray_actor_options={"num_gpus": 0.25},
    user_config=HaystackComponentDeploymentConfig(
        component="haystack.components.embedders.SentenceTransformersTextEmbedder",
        params={"model": "sentence-transformers/all-mpnet-base-v2"},
    ).model_dump(),
)
aana_app.register_deployment("text_embedder_deployment", text_embedder_deployment)

In [5]:
document_embedder_deployment = HaystackComponentDeployment.options(
    num_replicas=1,
    max_concurrent_queries=1000,
    ray_actor_options={"num_gpus": 0.25},
    user_config=HaystackComponentDeploymentConfig(
        component="haystack.components.embedders.SentenceTransformersDocumentEmbedder",
        params={"model": "sentence-transformers/all-mpnet-base-v2"},
    ).model_dump(),
)
aana_app.register_deployment(
    "document_embedder_deployment", document_embedder_deployment
)

Now you can replace original Haystack components with Remote components that run on the cluster and can be reused by different endpoints and workers.

In [7]:
from haystack import Document, Pipeline
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore

from aana.deployments.haystack_component_deployment import RemoteHaystackComponent

document_store = InMemoryDocumentStore(embedding_similarity_function="cosine")

documents = [
    Document(content="My name is Wolfgang and I live in Berlin"),
    Document(content="I saw a black horse running"),
    Document(content="Germany has many big cities"),
]

document_embedder = RemoteHaystackComponent(
    "document_embedder_deployment"
)  # instead of SentenceTransformersDocumentEmbedder()
document_embedder.warm_up()
documents_with_embeddings = document_embedder.run(documents=documents)["documents"]
document_store.write_documents(documents_with_embeddings)

text_embedder = RemoteHaystackComponent(
    "text_embedder_deployment"
)  # SentenceTransformersTextEmbedder()
text_embedder.warm_up()

query_pipeline = Pipeline()
query_pipeline.add_component("text_embedder", text_embedder)
query_pipeline.add_component(
    "retriever", InMemoryEmbeddingRetriever(document_store=document_store)
)
query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")

query = "Who lives in Berlin?"

result = query_pipeline.run({"text_embedder": {"text": query}})

print(result["retriever"]["documents"][0])

# Document(id=..., mimetype: 'text/plain',
#  text: 'My name is Wolfgang and I live in Berlin')

Document(id=62fad790ad2af927af9432c87330ed2ea5e31332cdec8e9d6235a5105ab0aaf5, content: 'My name is Wolfgang and I live in Berlin', score: 0.5515621624192681)


And we can package it as endpoint and deploy.

In [13]:
from typing import TypedDict

from haystack import Document, Pipeline
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore

from aana.api.api_generation import Endpoint


class HaystackTestEndpointOutput(TypedDict):
    response: str


class HaystackTestEndpoint(Endpoint):
    async def initialize(self):
        document_store = InMemoryDocumentStore(embedding_similarity_function="cosine")

        documents = [
            Document(content="My name is Wolfgang and I live in Berlin"),
            Document(content="I saw a black horse running"),
            Document(content="Germany has many big cities"),
        ]

        document_embedder = RemoteHaystackComponent("document_embedder_deployment")
        document_embedder.warm_up()
        documents_with_embeddings = document_embedder.run(documents=documents)[
            "documents"
        ]
        document_store.write_documents(documents_with_embeddings)

        text_embedder = RemoteHaystackComponent("text_embedder_deployment")
        text_embedder.warm_up()

        self.query_pipeline = Pipeline()
        self.query_pipeline.add_component("text_embedder", text_embedder)
        self.query_pipeline.add_component(
            "retriever", InMemoryEmbeddingRetriever(document_store=document_store)
        )
        self.query_pipeline.connect(
            "text_embedder.embedding", "retriever.query_embedding"
        )

    async def run(self, query: str) -> HaystackTestEndpointOutput:
        result = self.query_pipeline.run({"text_embedder": {"text": query}})
        return result["retriever"]["documents"][0].content

In [14]:
aana_app.register_endpoint(
    name="haystack_test_endpoint",
    summary="A test endpoint for Haystack",
    path="/query",
    endpoint_cls=HaystackTestEndpoint,
)

In [15]:
aana_app.deploy(blocking=False)

Deployed successfully.


Let's try a few requests.

In [16]:
import requests, json

data = {"query": "Who lives in Berlin?"}
response = requests.post(
    "http://127.0.0.1:8000/query",
    data={"body": json.dumps(data)},
)
print(response.json())

My name is Wolfgang and I live in Berlin


In [17]:
import requests, json

data = {"query": "What is the interesting fact about Germany?"}
response = requests.post(
    "http://127.0.0.1:8000/query",
    data={"body": json.dumps(data)},
)
print(response.json())

Germany has many big cities
