In [6]:
%conda install jupyter pytorch

Channels:
 - conda-forge
 - defaults
Platform: osx-arm64
Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.


Note: you may need to restart the kernel to use updated packages.


In [10]:
%pip install haystack-ai ollama-haystack sentence-transformers 'txtai[pipeline-data]'

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting txtai[pipeline-data]
  Downloading txtai-6.3.0-py3-none-any.whl.metadata (24 kB)
Collecting faiss-cpu>=1.7.1.post2 (from txtai[pipeline-data])
  Using cached faiss_cpu-1.7.4-cp311-cp311-macosx_11_0_arm64.whl (2.7 MB)
Collecting tika>=1.24 (from txtai[pipeline-data])
  Using cached tika-2.6.0-py3-none-any.whl
Downloading txtai-6.3.0-py3-none-any.whl (205 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m205.2/205.2 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu, tika, txtai
Successfully installed faiss-cpu-1.7.4 tika-2.6.0 txtai-6.3.0
Note: you may need to restart the kernel to use updated packages.


In [8]:
from haystack import Pipeline, Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from ollama_haystack import OllamaGenerator
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder
from haystack.components.writers import DocumentWriter

prompt_template = """
Given these documents, answer the question.
Documents:
{% for doc in documents %}
    {{ doc.content }}
{% endfor %}
Question: {{question}}
Answer:
"""

document_store = InMemoryDocumentStore(embedding_similarity_function="cosine")

document_embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="BAAI/bge-large-en-v1.5")
document_embedder.warm_up()

retriever = InMemoryEmbeddingRetriever(document_store=document_store)
prompt_builder = PromptBuilder(template=prompt_template)
llm = OllamaGenerator(model="mixtral-loc",
                      url="http://localhost:11434/api/generate",
                      generation_kwargs={
                          "num_predict": 1024,
                          "temperature": 0.8,
                          "seed": 42,
                          "num_gpu": 1,
                          "num_ctx": 32768
                      })
answer_builder = AnswerBuilder()

documents = [
    Document(content="My name is Jean and I live in Paris."),
    Document(content="My name is Mark and I live in Berlin."),
    Document(content="My name is Giorgio and I live in Rome.")
]

document_writer = DocumentWriter(document_store = document_store)
indexing_pipeline = Pipeline()
indexing_pipeline.add_component(instance=document_embedder, name="embedder")
indexing_pipeline.add_component(instance=document_writer, name="writer")
indexing_pipeline.connect("embedder", "writer")
indexing_pipeline.run({"embedder": {"documents": documents}})

rag_pipeline = Pipeline()
rag_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder(model_name_or_path="BAAI/bge-large-en-v1.5"))
rag_pipeline.add_component("retriever", retriever)
rag_pipeline.add_component("prompt_builder", prompt_builder)
rag_pipeline.add_component("llm", llm)
rag_pipeline.add_component("answer_builder", answer_builder)
rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")
rag_pipeline.connect("llm.replies", "answer_builder.replies")

rag_pipeline.draw("rag_pipeline.png")

question = "Where does Mark live?"
results = rag_pipeline.run(
    {
        "text_embedder": {"text": question},
        "prompt_builder": {"question": question},
        "answer_builder": {"query": question},
    }
)

for answer in results["answer_builder"]["answers"]:
    print(answer.data)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

 Mark lives in Berlin. This information is given in the first document.
