In [1]:
import os
from getpass import getpass

os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI Api key: ")

In [2]:
# MessagePack is an efficient binary serialization format. It's not supposed to be human-readable, as you can see:
ancient_instructions = b'\x84\xa8metadata\x80\xb1max_loops_allowedd\xaacomponents\x86\xa9converter\x82\xa4type\xd92haystack.components.converters.html.HTMLToDocument\xafinit_parameters\x80\xa7fetcher\x82\xa4type\xd9<haystack.components.fetchers.link_content.LinkContentFetcher\xafinit_parameters\x84\xb0raise_on_failure\xc3\xabuser_agents\x91\xd9#haystack/LinkContentFetcher/0.152.0\xaeretry_attempts\x02\xa7timeout\x03\xa3llm\x82\xa4type\xd92haystack.components.generators.openai.OpenAIGenerator\xafinit_parameters\x85\xaamodel_name\xadgpt-3.5-turbo\xb2streaming_callback\xc0\xacapi_base_url\xb9https://api.openai.com/v1\xb1generation_kwargs\x80\xadsystem_prompt\xc0\xaeprompt_builder\x82\xa4type\xd99haystack.components.builders.prompt_builder.PromptBuilder\xafinit_parameters\x81\xa8template\xd9\x8a Acc  ding to these docu  nts:\n{% for  oc in documents %}  {{ doc.con     }} {% endfor %}\nAnswer the given qu  tion: {{question}} Answer: \xa6ranker\x82\xa4type\xd9Phaystack.components.rankers.transformers_similarity.TransformersSimilarityRanker\xafinit_parameters\x84\xa6device\xa3cpu\xb2model_name_or_path\xd9$cross-encoder/ms-marco-MiniLM-L-6-v2\xa5token\xc0\xa5top_k\x03\xa8splitter\x82\xa4type\xd9Dhaystack.components.preprocessors.document_splitter.DocumentSplitter\xafinit_parameters\x83\xa8split_by\xa4word\xacsplit_length2\xadsplit_overlap\x00\xabconnections\x95\x82\xa6sender\xb3converter.documents\xa8receiver\xb2splitter.documents\x82\xa6sender\xaffetcher.streams\xa8receiver\xb1converter.sources\x82\xa6sender\xb5prompt_builder.prompt\xa8receiver\xaallm.prompt\x82\xa6sender\xb0ranker.documents\xa8receiver\xb8prompt_builder.documents\x82\xa6sender\xb2splitter.documents\xa8receiver\xb0ranker.documents'

In [7]:
from typing import Dict, Any, Union
import msgpack


class MsgpackMarshaller:
    """
    Custom Messagepack marshaller implementing
    the Marshaller protocol in Haystack.
    """
    def marshal(self, dict_: Dict[str, Any]) -> str:
        return msgpack.dumps(dict_)

    def unmarshal(self, data_: Union[str, bytes]) -> Dict[str, Any]:
        return dict(msgpack.loads(data_))

In [None]:
from haystack import Pipeline

pipe = Pipeline.loads(ancient_instructions, MsgpackMarshaller())
print(pipe.dumps())

In [9]:
broken_pipeline_definition = """
components:
  converter:
    init_parameters: {}
    type: haystack.components.converters.html.HTMLToDocument
  fetcher:
    init_parameters:
      raise_on_failure: true
      retry_attempts: 2
      timeout: 3
      user_agents:
      - haystack/LinkContentFetcher/0.152.0
    type: haystack.components.fetchers.link_content.LinkContentFetcher
  llm:
    init_parameters:
      api_base_url: https://api.openai.com/v1
      generation_kwargs: {}
      model: gpt-3.5-turbo
      streaming_callback: null
      system_prompt: null
    type: haystack.components.generators.openai.OpenAIGenerator
  prompt_builder:
    init_parameters:
      template: ' Acc  ding to these docu  nts:

        {% for  oc in documents %}  {{ doc.con     }} {% endfor %}

        Answer the given qu  tion: {{question}} Answer: '
    type: haystack.components.builders.prompt_builder.PromptBuilder
  ranker:
    init_parameters:
      device: cpu
      model_name_or_path: cross-encoder/ms-marco-MiniLM-L-6-v2
      token: null
      top_k: 3
    type: haystack.components.rankers.transformers_similarity.TransformersSimilarityRanker
  splitter:
    init_parameters:
      split_by: word
      split_length: 50
      split_overlap: 0
    type: haystack.components.preprocessors.document_splitter.DocumentSplitter
connections:
- receiver: splitter.documents
  sender: converter.documents
- receiver: converter.sources
  sender: fetcher.streams
- receiver: llm.prompt
  sender: prompt_builder.prompt
- receiver: prompt_builder.documents
  sender: ranker.documents
- receiver: ranker.documents
  sender: splitter.documents
max_loops_allowed: 100
metadata: {}

"""

In [None]:
fixed_pipeline_definition = """
components:
  converter:
    init_parameters: {}
    type: haystack.components.converters.html.HTMLToDocument
  fetcher:
    init_parameters:
      raise_on_failure: true
      retry_attempts: 2
      timeout: 3
      user_agents:
      - haystack/LinkContentFetcher/0.152.0
    type: haystack.components.fetchers.link_content.LinkContentFetcher
  llm:
    init_parameters:
      api_base_url: https://api.openai.com/v1
      generation_kwargs: {}
      model: gpt-3.5-turbo
      streaming_callback: null
      system_prompt: null
    type: haystack.components.generators.openai.OpenAIGenerator
  prompt_builder:
    init_parameters:
      template: ' According to these documents:

        {% for doc in documents %}  {{ doc.content }} {% endfor %}

        Answer the given qu  tion: {{question}} Answer: '
    type: haystack.components.builders.prompt_builder.PromptBuilder
  ranker:
    init_parameters:
      device: cpu
      model_name_or_path: cross-encoder/ms-marco-MiniLM-L-6-v2
      token: null
      top_k: 3
    type: haystack.components.rankers.transformers_similarity.TransformersSimilarityRanker
  splitter:
    init_parameters:
      split_by: word
      split_length: 50
      split_overlap: 0
    type: haystack.components.preprocessors.document_splitter.DocumentSplitter
connections:
- receiver: splitter.documents
  sender: converter.documents
- receiver: converter.sources
  sender: fetcher.streams
- receiver: llm.prompt
  sender: prompt_builder.prompt
- receiver: prompt_builder.documents
  sender: ranker.documents
- receiver: ranker.documents
  sender: splitter.documents
max_loops_allowed: 100
metadata: {}

"""

working_pipeline = Pipeline.loads(fixed_pipeline_definition)
result = working_pipeline.run({
    "prompt_builder": {"question": "how do I start a lathe?"},
    "ranker": {"query": "how do I start a lathe?"},
    "fetcher": {"urls": ["https://en.wikipedia.org/wiki/Lathe"]}
})
print(result["llm"]["replies"][0])