# Haystack Basic

## Components

In [None]:
from haystack import Document
from haystack_integrations.components.embedders.ollama import OllamaDocumentEmbedder

embedder = OllamaDocumentEmbedder(model="llama3.1:8b")

In [None]:
documents = [
    Document(
        content="Haystack is an open source AI framework to build full AI applications in Python"
    ),
    Document(content="You can build AI Pipelines by combining Components"),
]

result = embedder.run(documents=documents)

In [None]:
# result = embedder.run(documents)
print(result["documents"][0].embedding)

## Pipelines

### Initialize a Document Store

In [None]:
# from haystack import Pipeline
# from haystack.components.converters import PyPDFToDocument, TextFileToDocument
# from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
# from haystack.components.writers import DocumentWriter
# from haystack.document_stores.in_memory import InMemoryDocumentStore
# from haystack.document_stores.types import DuplicatePolicy
# from haystack_integrations.components.embedders.ollama import OllamaDocumentEmbedder

### Writing documents with embeddings into a document store

In [None]:
from haystack import Pipeline
from haystack.components.converters import TextFileToDocument
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.writers import DocumentWriter
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.document_stores.types import DuplicatePolicy
from haystack_integrations.components.embedders.ollama import OllamaDocumentEmbedder

In [None]:
document_store = InMemoryDocumentStore(embedding_similarity_function="cosine")

embedder = OllamaDocumentEmbedder(model="llama3.1:8b")

cleaner = DocumentCleaner()
file_converter = TextFileToDocument()
splitter = DocumentSplitter()
writer = DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE)

indexing_pipeline = Pipeline()

In [None]:
indexing_pipeline.add_component("converter", file_converter)
indexing_pipeline.add_component("splitter", splitter)
indexing_pipeline.add_component("cleaner", cleaner)
indexing_pipeline.add_component("embedder", embedder)
indexing_pipeline.add_component("writer", writer)

#### Connecting Components

In [None]:
indexing_pipeline.connect("converter", "cleaner")
indexing_pipeline.connect("cleaner", "splitter")
# indexing_pipeline.connect("converter", "splitter")
indexing_pipeline.connect("splitter", "embedder")
indexing_pipeline.connect("embedder", "writer")

In [None]:
indexing_pipeline.show()

#### Running Pipelines


In [None]:
indexing_pipeline.run({"converter": {"sources": ["data/davinci.txt"]}})
document_store.filter_documents()[0].content

### Creating a document search pipeline

In [None]:
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack_integrations.components.embedders.ollama import OllamaTextEmbedder

query_embedder = OllamaTextEmbedder(model="llama3.1:8b")
retriever = InMemoryEmbeddingRetriever(document_store=document_store)

document_search = Pipeline()

document_search.add_component("query_embedder", query_embedder)
document_search.add_component("retriever", retriever)

document_search.connect("query_embedder.embedding", "retriever.query_embedding")

In [None]:
document_search.show()

In [None]:
# from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever

# from haystack_integrations.components.embedders.ollama import OllamaTextEmbedder
# query_embedder = OllamaTextEmbedder(model="llama3.1:8b")
# retriever = InMemoryEmbeddingRetriever(document_store=document_store)

# document_search = Pipeline()

# document_search.add_component("query_embedder", query_embedder)
# document_search.add_component("retriever", retriever)

# document_search.connect("query_embedder.embedding", "retriever.query_embedding")


In [None]:
# Run a search query
query = "What is the content of the Davinci document?"
result = document_search.run({"query_embedder": {"text": query}})
print(result)

In [None]:
question = "How old was Davinci when he died?"

results = document_search.run({"query_embedder": {"text": question}})

for i, document in enumerate(results["retriever"]["documents"]):
    print("\n--------------\n")
    print(f"DOCUMENT {i}")
    print(document.content)

In [None]:
question = "How old was Davinci when he died?"

results = document_search.run(
    {"query_embedder": {"text": question}, "retriever": {"top_k": 3}}
)

for i, document in enumerate(results["retriever"]["documents"]):
    print("\n--------------\n")
    print(f"DOCUMENT {i}")
    print(document.content)

In [None]:
question = "Where was Davinci born?"

results = document_search.run(
    {"query_embedder": {"text": question}, "retriever": {"top_k": 3}}
)

for i, document in enumerate(results["retriever"]["documents"]):
    print("\n--------------\n")
    print(f"DOCUMENT {i}")
    print(document.content)

In [None]:
question = "When did Davinci live in Rome?"

results = document_search.run(
    {"query_embedder": {"text": question}, "retriever": {"top_k": 3}}
)

for i, document in enumerate(results["retriever"]["documents"]):
    print("\n--------------\n")
    print(f"DOCUMENT {i}")
    print(document.content)

# Haystack Build Customized RAG

In [None]:
# from haystack import Pipeline
# from haystack.utils.auth import Secret
# from haystack.components.builders import PromptBuilder
# from haystack.components.converters import HTMLToDocument
# from haystack.components.fetchers import LinkContentFetcher
# from haystack.components.generators import OpenAIGenerator
# from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
# from haystack.components.writers import DocumentWriter
# from haystack.document_stores.in_memory import InMemoryDocumentStore

# from haystack_integrations.components.embedders.cohere.document_embedder import CohereDocumentEmbedder
# from haystack_integrations.components.embedders.cohere.text_embedder import CohereTextEmbedder

In [None]:
from haystack import Pipeline
from haystack.components.builders import PromptBuilder
from haystack.components.converters import HTMLToDocument
from haystack.components.fetchers import LinkContentFetcher
from haystack.components.generators import OpenAIGenerator
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.writers import DocumentWriter
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.utils.auth import Secret
from haystack_integrations.components.embedders.cohere.document_embedder import (
    CohereDocumentEmbedder,
)
from haystack_integrations.components.embedders.cohere.text_embedder import (
    CohereTextEmbedder,
)
# from haystack_integrations.components.embedders.cohere import CohereDocumentEmbedder, CohereTextEmbedder

In [None]:
document_store = InMemoryDocumentStore()

fetcher = LinkContentFetcher()
converter = HTMLToDocument()
embedder = CohereDocumentEmbedder(
    model="embed-english-v3.0", api_base_url=os.getenv("CO_API_URL")
)
writer = DocumentWriter(document_store=document_store)

indexing = Pipeline()
indexing.add_component("fetcher", fetcher)
indexing.add_component("converter", converter)
indexing.add_component("embedder", embedder)
indexing.add_component("writer", writer)

indexing.connect("fetcher.streams", "converter.sources")
indexing.connect("converter", "embedder")
indexing.connect("embedder", "writer")

In [None]:
import requests
from langchain.chains import LLMChain
from langchain.llms import OpenAI  # Assuming you're using OpenAI
from langchain.text_loaders import TextLoader


# Function to fetch Hacker News articles
def fetch_hackernews_articles(top_k):
    trending_list = requests.get(
        url="https://hacker-news.firebaseio.com/v0/topstories.json?print=pretty"
    ).json()[0:top_k]
    articles = []
    for id in trending_list:
        post = requests.get(
            url=f"https://hacker-news.firebaseio.com/v0/item/{id}.json?print=pretty"
        ).json()
        if "url" in post:
            try:
                articles.append(post["text"])
            except:
                print(f"Can't download {post}, skipped")
        elif "text" in post:
            articles.append(post["text"])
        else:
            print(f"Can't download {post}, skipped")
    return articles


# Define the prompt template
prompt_template = """
You will be provided a few of the top posts in HackerNews.
For each post, provide a brief summary followed by the URL the full post can be found at.

Posts:
{% for article in articles %}
  {{ article }}
  URL: {{ article.meta.get('url', '') }}  # Handle posts without URL
{% endfor %}
"""

# Create the LangChain pipeline
llm = OpenAI()  # Replace with your preferred LLM
articles_loader = TextLoader(texts=fetch_hackernews_articles(top_k=3))
chain = LLMChain(llm=llm, prompt=prompt_template, text_loader=articles_loader)

# Run the pipeline
summaries = chain.run()
print(summaries)

In [None]:
from typing import List

import requests
from bs4 import BeautifulSoup
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

from langchain_ollama import ChatOllama
from langchain.schema import StrOutputParser


class HackernewsNewestFetcher:
    def __init__(self):
        self.base_url = "https://hacker-news.firebaseio.com/v0"

    def fetch_articles(self, top_k: int) -> List[dict]:
        response = requests.get(f"{self.base_url}/topstories.json?print=pretty")
        trending_list = response.json()
        articles = []

        for id in trending_list[:top_k]:
            post = requests.get(f"{self.base_url}/item/{id}.json?print=pretty").json()
            if "url" in post:
                try:
                    content = self.fetch_content(post["url"])
                    articles.append(
                        {"content": content, "title": post["title"], "url": post["url"]}
                    )

                except requests.RequestException as e:
                    print(f"Can't download {post}, skipped. Error: {e}")
                # except:
                # 	print(f"Can't download {post}, skipped")
            elif "text" in post:
                articles.append(
                    {
                        "content": post["text"],
                        "title": post["title"],
                        "url": f"https://news.ycombinator.com/item?id={id}",
                    }
                )

        return articles

    def fetch_content(self, url: str) -> str:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")
        return soup.get_text()


def summarize_articles(articles: List[dict]) -> str:
    prompt_template = """
	You will be provided a few of the top posts in HackerNews, followed by their URL.
	For each post, provide a brief summary followed by the URL the full post can be found at.

	Posts:
	{article_summaries}
	"""

    # Prepare the article summaries
    article_summaries = "\n\n".join(
        [
            f"Content: {article['content']}\nURL: {article['url']}"
            for article in articles
        ]
    )

    prompt = PromptTemplate(
        input_variables=["article_summaries"], template=prompt_template
    )

    llm = ChatOllama(
        model="llama3.1:8b",
        temperature=0,
        # other params...
    )

    output_parser = StrOutputParser()

    chain = prompt | llm | output_parser

    output = chain.invoke(
        {
            "article_summaries": article_summaries,
        }
    )
    return output


# Main execution
fetcher = HackernewsNewestFetcher()
articles = fetcher.fetch_articles(top_k=2)

summaries = summarize_articles(articles)

print(summaries)

In [None]:
# Main execution
fetcher = HackernewsNewestFetcher()
articles = fetcher.fetch_articles(top_k=2)

In [None]:
articles

In [None]:
summaries = summarize_articles(articles)

print(summaries)

In [None]:
article_summaries = "\n\n".join(
    [f"Content: {article['content']}\nURL: {article['url']}" for article in articles]
)

In [None]:
article_summaries

In [None]:
prompt_template = """
You will be provided a few of the top posts in HackerNews, followed by their URL.
For each post, provide a brief summary followed by the URL the full post can be found at.

Posts:
{article_summaries}
"""

In [None]:
prompt = PromptTemplate(input_variables=["article_summaries"], template=prompt_template)

In [None]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="llama3.1:8b",
    temperature=0,
    # other params...
)

In [None]:
chain = LLMChain(llm=llm, prompt=prompt)
chain.run(article_summaries=article_summaries)

In [None]:
from langchain.schema import StrOutputParser

output_parser = StrOutputParser()

chain = prompt | llm | output_parser

output = chain.invoke(
    {
        "article_summaries": article_summaries,
    }
)

In [None]:
output

In [None]:
from typing import List
import requests

from bs4 import BeautifulSoup
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_ollama import ChatOllama
from langchain.schema import StrOutputParser


class HackernewsNewestFetcher:
    def __init__(self):
        self.base_url = "https://hacker-news.firebaseio.com/v0"

    def fetch_articles(self, top_k: int) -> List[dict]:
        response = requests.get(f"{self.base_url}/topstories.json?print=pretty")
        trending_list = response.json()
        articles = []

        for id in trending_list[:top_k]:
            post = requests.get(f"{self.base_url}/item/{id}.json?print=pretty").json()
            if "url" in post:
                try:
                    content = self.fetch_content(post["url"])
                    articles.append(
                        {"content": content, "title": post["title"], "url": post["url"]}
                    )

                except requests.RequestException as e:
                    print(f"Can't download {post}, skipped. Error: {e}")
                # except:
                # 	print(f"Can't download {post}, skipped")
            elif "text" in post:
                articles.append(
                    {
                        "content": post["text"],
                        "title": post["title"],
                        "url": f"https://news.ycombinator.com/item?id={id}",
                    }
                )

        return articles

    def fetch_content(self, url: str) -> str:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")
        return soup.get_text()


def summarize_articles(articles: List[dict]) -> str:
    prompt_template = """
	You will be provided a few of the top posts in HackerNews, followed by their URL.
	For each post, provide a brief summary followed by the URL the full post can be found at.

	Posts:
	{article_summaries}
	"""

    # Prepare the article summaries
    article_summaries = "\n\n".join(
        [
            f"Content: {article['content']}\nURL: {article['url']}"
            for article in articles
        ]
    )

    prompt = PromptTemplate(
        input_variables=["article_summaries"], template=prompt_template
    )

    llm = ChatOllama(
        model="llama3.1:8b",
        temperature=0,
    )

    output_parser = StrOutputParser()

    chain = prompt | llm | output_parser

    output = chain.invoke(
        {
            "article_summaries": article_summaries,
        }
    )
    return output

In [1]:
from typing import List
from langchain.prompts import PromptTemplate
from langchain_ollama import ChatOllama
from langchain.schema import StrOutputParser
from typing import List
import requests

from bs4 import BeautifulSoup
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_ollama import ChatOllama
from langchain.schema import StrOutputParser


class HackernewsNewestFetcher:
    def __init__(self):
        self.base_url = "https://hacker-news.firebaseio.com/v0"

    def fetch_articles(self, top_k: int) -> List[dict]:
        response = requests.get(f"{self.base_url}/topstories.json?print=pretty")
        trending_list = response.json()
        articles = []

        for id in trending_list[:top_k]:
            post = requests.get(f"{self.base_url}/item/{id}.json?print=pretty").json()
            if "url" in post:
                try:
                    content = self.fetch_content(post["url"])
                    articles.append(
                        {"content": content, "title": post["title"], "url": post["url"]}
                    )

                except requests.RequestException as e:
                    print(f"Can't download {post}, skipped. Error: {e}")
                # except:
                # 	print(f"Can't download {post}, skipped")
            elif "text" in post:
                articles.append(
                    {
                        "content": post["text"],
                        "title": post["title"],
                        "url": f"https://news.ycombinator.com/item?id={id}",
                    }
                )

        return articles

    def fetch_content(self, url: str) -> str:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")
        return soup.get_text()


def summarize_articles(articles: List[dict]) -> str:
    prompt_template = """
    Summarize the following article from HackerNews. Provide a brief summary followed by the URL the full post can be found at.

    Article:
    Title: {title}
    Content: {content}
    URL: {url}

    Summary:
    """

    prompt = PromptTemplate(
        input_variables=["title", "content", "url"], template=prompt_template
    )

    llm = ChatOllama(
        model="llama3.1:8b",
        temperature=0,
    )

    chain = (prompt | llm | StrOutputParser()).invoke

    summaries = []
    for article in articles:
        summary = chain(
            {
                "title": article["title"],
                "content": article["content"],
                "url": article["url"],
            }
        )
        summaries.append(summary)

    return "\n\n".join(summaries)


# Main execution
fetcher = HackernewsNewestFetcher()
articles = fetcher.fetch_articles(top_k=2)
summaries = summarize_articles(articles)
print(summaries)

Here is a brief summary of the article:

**Summary:** This is an open-ended question posted to Hacker News, asking users what projects or ideas they are currently working on.

**URL:** https://news.ycombinator.com/item?id=41342017

The text discusses the importance of thinking in graphs rather than lists when it comes to network security. It highlights how attackers can use visualizations and connections between systems to compromise a High Value Asset (HVA) by targeting dependent elements, such as terminal servers, admin accounts, and certificate authorities.

Key points:

* Attackers think in graphs, not lists, which gives them an advantage over defenders who rely on mental models or outdated diagrams.
* Compromising a terminal server can lead to compromising admin accounts on other machines, which can then be used to access the HVA.
* Local admin accounts with common passwords, file servers, print servers, and certificate authorities are all potential security dependencies that can 

In [1]:
from typing import List
from langchain.prompts import PromptTemplate
from langchain_ollama import ChatOllama
from langchain.schema import StrOutputParser
import requests

from bs4 import BeautifulSoup


class HackerNewsFetcher:
    """A class to fetch articles from Hacker News."""

    def __init__(self):
        self.base_url = "https://hacker-news.firebaseio.com/v0"

    def _fetch_article(self, article_id: int) -> dict:
        """Fetch an article from the Hacker News API.

        Args:
            article_id (int): The ID of the article to fetch.

        Returns:
            dict: A dictionary containing the article's data.
        """
        try:
            post = requests.get(f"{self.base_url}/item/{article_id}.json").json()
            return post
        except requests.RequestException as e:
            print(f"Failed to fetch article {article_id}: {e}")
            return None

    def _fetch_content(self, url: str) -> str:
        """Fetch the content of a web page.

        Args:
            url (str): The URL of the web page.

        Returns:
            str: The content of the web page as text.
        """
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.content, "html.parser")
            return soup.get_text()
        except requests.RequestException as e:
            print(f"Failed to fetch content from {url}: {e}")
            return ""

    def fetch_articles(self, top_k: int) -> List[dict]:
        """Fetch the top K articles from Hacker News.

        Args:
            top_k (int): The number of articles to fetch.

        Returns:
            List[dict]: A list of dictionaries containing the article's data.
        """
        response = requests.get(f"{self.base_url}/topstories.json")
        trending_list = response.json()

        articles = []
        for id in trending_list[:top_k]:
            post = self._fetch_article(id)
            if post and "url" in post:
                content = self._fetch_content(post["url"])
                article_data = {
                    "content": content,
                    "title": post["title"],
                    "url": post["url"],
                }
                articles.append(article_data)

        return articles


class ArticleSummarizer:
    """A class to summarize articles fetched from Hacker News."""

    def __init__(self):
        self.prompt_template = """
            Summarize the following article from HackerNews. Provide a brief summary followed by the URL the full post can be found at.

            Article:
            Title: {title}
            Content: {content}
            URL: {url}

            Summary:
            """

    def summarize_articles(self, articles: List[dict]) -> str:
        """Summarize a list of articles fetched from Hacker News.

        Args:
            articles (List[dict]): A list of dictionaries containing the article's data.

        Returns:
            str: A string summarizing each article.
        """
        llm = ChatOllama(model="llama3.1:8b", temperature=0)
        prompt_template = PromptTemplate(
            input_variables=["title", "content", "url"], template=self.prompt_template
        )

        chain = (prompt_template | llm | StrOutputParser()).invoke

        summaries = []
        for article in articles:
            summary = chain(
                {
                    "title": article["title"],
                    "content": article["content"],
                    "url": article["url"],
                }
            )
            summaries.append(summary)

        return "\n\n".join(summaries)


# Main execution
fetcher = HackerNewsFetcher()
articles = fetcher.fetch_articles(top_k=2)
summarizer = ArticleSummarizer()
summaries = summarizer.summarize_articles(articles)
print(summaries)

Here is a brief summary of the article:

The article "Defenders think in lists, attackers think in graphs (2015)" suggests that defenders and attackers have different mindsets when it comes to security. Defenders tend to think in linear, list-based ways, while attackers think in non-linear, graph-based ways. This difference in thinking makes it easier for attackers to outmaneuver defenders.

URL: https://github.com/JohnLaTwC/Shared/blob/master/Defenders%20think%20in%20lists.%20Attackers%20think%20in%20graphs.%20As%20long%20as%20this%20is%20true%2C%20attackers%20win.md


In [None]:
articles

In [None]:
# Main execution
fetcher = HackernewsNewestFetcher()
articles = fetcher.fetch_articles(top_k=2)
summaries = summarize_articles(articles)
print(summaries)

In [None]:
def summarize_articles(articles: List[dict]) -> str:
    prompt_template = """
	Summarize the following article from HackerNews. Provide a brief summary followed by the URL the full post can be found at.

	Article:
	Title: {title}
	Content: {content}
	URL: {url}

	Summary:
	"""

    prompt = PromptTemplate(
        input_variables=["title", "content", "url"], template=prompt_template
    )

    llm = ChatOllama(
        model="llama3.1:8b",
        temperature=0,
    )

    # output_parser = StrOutputParser()

    # chain = prompt | llm | output_parser

    # output = chain.invoke(
    # 	{
    # 		"article_summaries":article_summaries,
    # 	}
    # )

    chain = (prompt | llm | StrOutputParser()).invoke

    summaries = []
    for article in articles:
        summary = chain(
            {
                "title": article["title"],
                "content": article["content"],
                "url": article["url"],
            }
        )
        summaries.append(summary)

    return "\n\n".join(summaries)


# Main execution
fetcher = HackernewsNewestFetcher()
articles = fetcher.fetch_articles(top_k=2)
summaries = summarize_articles(articles)
print(summaries)

### Web search

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent
from langchain.memory import ConversationBufferMemory
from langchain_community.tools import DuckDuckGoSearchRun
from langchain.schema import AgentAction, AgentFinish, Document

# Initialize Ollama embeddings and ChatOllama
embeddings = OllamaEmbeddings(model="llama3.1:8b")
# llm = ChatOllama(model="llama2")
llm = ChatOllama(
    model="llama3.1:8b",
    temperature=0,
)

# Create documents
documents = [
    Document(
        page_content="Retrievers: Retrieves relevant documents to a user query using keyword search or semantic search."
    ),
    Document(page_content="Embedders: Creates embeddings for text or documents."),
    Document(
        page_content="Generators: Use a number of model providers to generate answers or content based on a prompt"
    ),
    Document(
        page_content="File Converters: Converts different file types like TXT, Markdown, PDF, etc. into a Haystack Document type"
    ),
]

# Create vector store
faiss_db = FAISS.from_documents(documents, embeddings)

# Create RAG prompt template
rag_prompt_template = PromptTemplate(
    template="""
		Answer the following query given the documents.
		If the answer is not contained within the documents, reply with 'no_answer'
		Query: {query}
		Documents:
		{context}
	""",
    input_variables=["query", "context"],
)

# Create RAG chain
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=faiss_db.as_retriever(),
    chain_type_kwargs={"prompt": rag_prompt_template},
)

# Create websearch prompt template
websearch_prompt_template = PromptTemplate(
    template="""
		Answer the following query given the documents retrieved from the web.
		Your answer should indicate that your answer was generated from websearch.
		You can also reference the URLs that the answer was generated from
		Query: {query}
		Documents:
		{context}
	""",
    input_variables=["query", "context"],
)

# Create websearch tool
search = DuckDuckGoSearchRun()
websearch_tool = Tool(
    name="Web Search",
    func=search.run,
    description="Useful for when you need to answer questions about current events or the current state of the world",
)


# Create custom output parser
class CustomOutputParser:
    def parse(self, text: str) -> AgentAction | AgentFinish:
        if "no_answer" in text.lower():
            return AgentAction(tool="Web Search", tool_input=text, log=text)
        return AgentFinish(return_values={"output": text}, log=text)


# Create agent prompt
agent_prompt = PromptTemplate(
    template="""
    	Answer the following question:
		{query}
		If you don't know the answer, respond with 'no_answer'.
	""",
    input_variables=["query"],
)

# Create LLM chain for the agent
llm_chain = rag_chain

# Create the agent
agent = LLMSingleActionAgent(
    llm_chain=llm_chain,
    output_parser=CustomOutputParser(),
    stop=["\nObservation:"],
    allowed_tools=["Web Search"],
)

# Create tools
tools = [websearch_tool]

# Create the agent executor
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    verbose=True,
    memory=ConversationBufferMemory(memory_key="chat_history"),
)

# Test the pipeline
print("Query: What is a retriever for?")
result = agent_executor.run("What is a retriever for?")
print(f"Result: {result}\n")

print("Query: What Mistral components does Langchain have?")
result = agent_executor.run("What Mistral components does Langchain have?")
print(f"Result: {result}")