In [2]:
import os
import requests
from typing import Type
from langchain.tools import BaseTool
from langchain.agents import initialize_agent, AgentType
from pydantic import BaseModel, Field
from langchain.chat_models import ChatOpenAI
from langchain.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper
from langchain.schema import SystemMessage

In [6]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)


class SearchToolSchema(BaseModel):
    query: str = Field(description="The query to search for")


class DuckDuckGoSearchTool(BaseTool):
    name: str = "DuckDuckGoSearchTool"
    args_schema: Type[SearchToolSchema] = SearchToolSchema
    description: str = """
    Use this tool to find the relevant information in the DuckDuckGo search.
    If you find the relevant page, pass the page url to the ScrapeTool.
    """

    def _run(self, query: str) -> str:
        from duckduckgo_search import DDGS
        results = []
        with DDGS(
                headers=
            {
                "User-Agent":
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
            }) as ddgs:
            # Use the text search without max_results parameter
            for r in ddgs.text(query, region='wt-wt'):
                results.append(r)
                if len(results) >= 3:  # Limit to 3 results
                    break

        # Format the results into a string
        formatted_results = []
        for r in results:
            formatted_results.append(
                f"Title: {r['title']}\nLink: {r['href']}\nSnippet: {r['body']}\n"
            )

        return "\n".join(formatted_results)


class WikipediaSearchTool(BaseTool):
    name: str = "WikipediaSearchTool"
    args_schema: Type[SearchToolSchema] = SearchToolSchema
    description: str = """
    Use this tool when you cannot find relevant information in the DuckDuckGoSearchTool. Pass the information to the SaveTool.
    """

    def _run(self, query: str) -> str:
        return WikipediaAPIWrapper().run(query)


class ScrapeToolSchema(BaseModel):
    url: str = Field(description="The url to scrape")


class ScrapeTool(BaseTool):
    name: str = "ScrapeTool"
    args_schema: Type[ScrapeToolSchema] = ScrapeToolSchema
    description: str = """
    Use this tool to scrape the information from the page.
    """

    def _run(self, url: str) -> str:
        return requests.get(url).text


class SaveToolSchema(BaseModel):
    content: str = Field(description="The content to save")


class SaveTool(BaseTool):
    name: str = "SaveTool"
    args_schema: Type[SaveToolSchema] = SaveToolSchema
    description: str = """
    Use this tool to save the information to the file.
    """

    def _run(self, content: str) -> str:
        with open("output.txt", "w") as f:
            f.write(content)
        return "Result saved to output.txt"


tools = [
    DuckDuckGoSearchTool(),
    WikipediaSearchTool(),
    ScrapeTool(),
    SaveTool(),
]

agent = initialize_agent(llm=llm,
                         tools=tools,
                         agent=AgentType.OPENAI_FUNCTIONS,
                         agent_kwargs={
                             "system_message":
                             SystemMessage(content="""
                                 You are a researcher.
                                 You research about the given query and save the information to the txt file.
                                 You use the DuckDuckGoSearchTool to find the relevant information and return the first url to the ScrapeTool.
                                 If you cannot find the relevant information, use the WikipediaSearchTool.
                                 """)
                         },
                         verbose=True)

print(agent.invoke("Research about the XZ backdoor"))




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `DuckDuckGoSearchTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mTitle: XZ Utils backdoor - Wikipedia
Link: https://en.wikipedia.org/wiki/XZ_Utils_backdoor
Snippet: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". [b] [4] The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution capabilities on the affected Linux system.The issue has been given the Common Vulnerabilities and ...

Title: The XZ Backdoor: Everything You Need to Know - WIRED
Link: https://www.wired.com/story/xz-backdoor-everything-you-need-to-know/
Snippet: What Does the Backdoor Do? Malicious code added to xz Utils versions 5.6.0 and 5.6.1 modified the way the software functions. The backdoor manipulated sshd, the executable file used to make remote ...

Title: FAQ on t