In [1]:
# Fetch data files from the Github repo
# !curl -sL https://github.com/deepset-ai/haystack-core-integrations/tarball/main -o main.tar
# !mkdir main
# !tar xf main.tar -C main --strip-components 1
# !mv main/integrations/chroma/example/data .

# https://github.com/deepset-ai/haystack-cookbook/blob/main/notebooks/chroma-indexing-and-rag-examples.ipynb

In [2]:
import os
from pathlib import Path

from haystack import Pipeline
from haystack.components.converters import TextFileToDocument
from haystack.components.writers import DocumentWriter

from haystack_integrations.document_stores.chroma import ChromaDocumentStore

file_paths = ["data" / Path(name) for name in os.listdir("data")]

# Chroma is used in-memory so we use the same instances in the two pipelines below
document_store = ChromaDocumentStore()

indexing = Pipeline()
indexing.add_component("converter", TextFileToDocument())
indexing.add_component("writer", DocumentWriter(document_store))
indexing.connect("converter", "writer")
indexing.run({"converter": {"sources": file_paths}})


{'writer': {'documents_written': 36}}

In [3]:
from haystack_integrations.components.retrievers.chroma import ChromaQueryTextRetriever
from haystack.components.generators import HuggingFaceTGIGenerator, OpenAIGenerator
from haystack.components.builders import PromptBuilder

In [4]:
prompt = """
Answer the query based on the provided context.
If the context does not contain the answer, say 'Answer not found'.
Context:
{% for doc in documents %}
  {{ doc.content }}
{% endfor %}
query: {{query}}
Answer:
"""

In [5]:
prompt_builder = PromptBuilder(template=prompt)

In [6]:
# llm = HuggingFaceTGIGenerator(model="mistralai/Mistral-7B-Instruct-v0.1")
# llm.warm_up()

In [7]:
import os
os.environ['OPENAI_API_KEY']='sk-111111111111111111111111111111111111111111111111'
os.environ['OPENAI_API_BASE']='http://127.0.0.1:5000/v1'
# os.environ['OPENAI_API_BASE']='http://127.0.0.1:8000/v1'

In [8]:
import requests
model_info_url = 'http://127.0.0.1:5000/v1/internal/model/info'
resp = requests.get(model_info_url)
model = resp.json()['model_name']

print(model)

Mistral-7B-Instruct-v0.2-8.0bpw-h8-exl2-2


In [9]:
llm = OpenAIGenerator(api_base_url=os.getenv('OPENAI_API_BASE'))

In [10]:
querying = Pipeline()
querying.add_component("retriever", ChromaQueryTextRetriever(document_store))
querying.add_component("prompt_builder", prompt_builder)
querying.add_component("llm", llm)

querying.connect("retriever.documents", "prompt_builder.documents")
querying.connect("prompt_builder", "llm")



In [11]:
query = "Should I write documentation for my plugin?"

In [12]:
results = querying.run({
    "retriever": {"query": query, "top_k": 3},
    "prompt_builder": {"query": query},
    "llm":{"generation_kwargs": {"max_tokens": 2000}}
})

In [13]:
print(results["llm"]["replies"][0])

Yes, it is strongly recommended to write documentation for your plugin. This will make it easier for other users to understand and use your plugin. You can use Vim's built-in help system to document your functions, variables, and commands. The documentation should explain what your plugin does, how to install and use it, and any configuration options that are available. This will help ensure that your plugin is accessible and useful to as many users as possible.


In [14]:
results = querying.run({
    "retriever": {"query": query, "top_k": 3},
    "prompt_builder": {"query": query},
    "llm":{"generation_kwargs": {"max_tokens": 350}}
})

In [15]:
print(results["llm"]["replies"][0])

Yes, writing documentation for your plugin is highly recommended. This will help users understand how to use your plugin, provide examples of how it can be used, and give them a clear understanding of its features and capabilities. Good documentation can also help reduce support requests and make it easier for users to troubleshoot issues on their own. You can include documentation in various formats such as text files, HTML help files, or even integrated help within your plugin.
