In [1]:
from typing import Type, List
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool, DuckDuckGoSearchResults
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import WikipediaAPIWrapper
from langchain.document_loaders import WebBaseLoader
from pathlib import Path

# wikipedia search tool
class WikipediaSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for. Example query: Research about the XZ backdoor"
    )

class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearchTool"
    description = """
    Use this tool to find the information about a topic from Wikipedia.
    It takes a query as an argument.

    """
    args_schema: Type[
        WikipediaSearchToolArgsSchema
    ] = WikipediaSearchToolArgsSchema

    def _run(self, query):
        wiki = WikipediaAPIWrapper()
        return wiki.run(query)

# Research site URLs tool by DuckDuckGo
class DuckDuckGoSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for. Example query: Research about the XZ backdoor"
    )

class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearchTool"
    description = """
    Use this tool to find the set of URLs that contain information about a topic.
    It takes a query as an argument.
    
    """
    args_schema: Type[
        DuckDuckGoSearchToolArgsSchema
    ] = DuckDuckGoSearchToolArgsSchema

    def _run(self, query):
        ddg = DuckDuckGoSearchResults()
        return ddg.run(query)

# Read site URL tool by WebBaseLoader
class WebLoaderToolArgsSchema(BaseModel):
    list_of_urls: List[str] = Field(
        description="a list of urls you will search for. Examples: ['https://www.example.com/', 'https://google.com']"
    )

class WebLoaderTool(BaseTool):
    name = "WebLoaderTool"
    description = """
    Use this tool to get site's contents of URL that contains information about a topic.
    It takes list of urls as an argument.
    
    """
    args_schema: Type[
        WebLoaderToolArgsSchema
    ] = WebLoaderToolArgsSchema

    def _run(self, list_of_urls):
        loader = WebBaseLoader(list_of_urls)
        return "\n\n".join(doc.page_content.replace("\\n", "") for doc in loader.load())

# agent
llm = ChatOpenAI(temperature=0.1, model_name="gpt-4o-mini")
agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        DuckDuckGoSearchTool(output_format="list"),
        WebLoaderTool(),
        WikipediaSearchTool(),
    ],
)

# writer
def write_output(output):
    output_dir = "./output"
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    with open(f"{output_dir}/output.txt", "wb") as f:
        f.write(output.get("output").encode("utf-8"))

# prompt
keyword = "XZ backdoor"
response = agent.invoke(f"Research about the {keyword} by searching on Wikipedia and DuckDuckGo. Then read the contents of the first 3 URLs that contain information about the {keyword}.")

# write output
write_output(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearchTool` with `{'query': 'XZ backdoor'}`


[0m[38;5;200m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution capabilities on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 