In [7]:
import os
import requests
from typing import Type
from langchain.tools import BaseTool
from langchain.agents import initialize_agent, AgentType
from pydantic import BaseModel, Field
from langchain.chat_models import ChatOpenAI
from langchain.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper
from langchain.schema import SystemMessage

In [9]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)


class SearchToolSchema(BaseModel):
    query: str = Field(description="The query to search for")


class DuckDuckGoSearchTool(BaseTool):
    name: str = "DuckDuckGoSearchTool"
    args_schema: Type[SearchToolSchema] = SearchToolSchema
    description: str = """
    Use this tool to find the relevant information in the DuckDuckGo search.
    If you find the relevant page, pass the page url to the ScrapeTool.
    """

    def _run(self, query: str) -> str:
        return DuckDuckGoSearchAPIWrapper().run(query)


class WikipediaSearchTool(BaseTool):
    name: str = "WikipediaSearchTool"
    args_schema: Type[SearchToolSchema] = SearchToolSchema
    description: str = """
    Use this tool when you cannot find relevant information in the DuckDuckGoSearchTool. Pass the information to the SaveTool.
    """

    def _run(self, query: str) -> str:
        return WikipediaAPIWrapper().run(query)


class ScrapeToolSchema(BaseModel):
    url: str = Field(description="The url to scrape")


class ScrapeTool(BaseTool):
    name: str = "ScrapeTool"
    args_schema: Type[ScrapeToolSchema] = ScrapeToolSchema
    description: str = """
    Use this tool to scrape the information from the page.
    """

    def _run(self, url: str) -> str:
        with open(f"{url}.txt", "w") as f:
            f.write(requests.get(url).text)
        return f"{url}.txt"


class SaveToolSchema(BaseModel):
    content: str = Field(description="The content to save")


class SaveTool(BaseTool):
    name: str = "SaveTool"
    args_schema: Type[SaveToolSchema] = SaveToolSchema
    description: str = """
    Use this tool to save the information to the file.
    """

    def _run(self, content: str) -> str:
        with open("output.txt", "w") as f:
            f.write(content)
        return "Result saved to output.txt"


tools = [
    DuckDuckGoSearchTool(),
    WikipediaSearchTool(),
    ScrapeTool(),
    SaveTool(),
]

agent = initialize_agent(llm=llm,
                         tools=tools,
                         agent=AgentType.OPENAI_FUNCTIONS,
                         agent_kwargs={
                             "system_message":
                             SystemMessage(content="""
                                 You are a researcher.
                                 You research about the given query and save the information to the txt file.
                                 """)
                         },
                         verbose=True)

print(agent.invoke("Research about the XZ backdoor"))




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearchTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution capabilities on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2024