In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.tools import BaseTool, DuckDuckGoSearchResults
from langchain.document_loaders import WikipediaLoader
from pydantic import BaseModel, Field
from typing import Type
import os

llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.1,
)


class ResearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for in Wikipedia or DuckDuckGo. Example query: Research about the BPFDoor."
    )


class ResearchTool(BaseTool):
    name = "ResearchTool"
    description = """
    Use this tool to Search query in Wikipedia or DuckDuckGo.
    If it finds a website in DuckDuckGo, It should enter the website and extract the contents
    It takes a query as an argument.
    """
    args_schema: Type[ResearchToolArgsSchema] = ResearchToolArgsSchema

    def _run(self, query: str):
        try:
            # First, Find in Wikipedia
            wikipedia_loader = WikipediaLoader(query=query, load_max_docs=2)
            wikipedia_result = wikipedia_loader.load()

            wikipedia_content = ""
            if wikipedia_result:
                wikipedia_content = "\n\n".join(
                    [doc.page_content for doc in wikipedia_result]
                )
            else:
                wikipedia_content = "No Wikipedia results found."

            # Second, Find in DuckDuckGo
            duckduckgo = DuckDuckGoSearchResults(max_results=5)
            duckduckgo_result = duckduckgo.run(query)

            return {
                "wikipedia_result": wikipedia_content,
                "ddg_result": duckduckgo_result
            }
        except Exception as e:
            return f"Error during research: {str(e)}"


def createFile(query, result):
    directory_path = './agent_result'
    try:
        if not os.path.exists(directory_path):
            os.makedirs(directory_path)

        if isinstance(result, dict):
            result_text = str(result.get('output', result))
        else:
            result_text = str(result)

        with open(f"{directory_path}/{query}.txt", 'w', encoding='utf-8') as f:
            f.write(result_text)
        print(f"Results saved to {directory_path}/{query}.txt")
    except OSError as e:
        print(f"Error: Failed to create file. {str(e)}")


research_agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        ResearchTool(),
    ],
)

query = "XZ backdoor"
prompt = f"Research about the {query} and provide a comprehensive summary of what it is, how it works, and its impact."

print(f"Starting research on: {query}")
result = research_agent.invoke({"input": prompt})

createFile(query, result)
print("Research completed successfully!")

Starting research on: XZ backdoor


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `ResearchTool` with `{'query': 'XZ backdoor'}`


[0m

  with DDGS() as ddgs:
  results = ddgs.text(


[36;1m[1;3m{'wikipedia_result': 'In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.\nWhile xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2024.\n\n\n== Background ==\nMicrosoft employee and PostgreSQL developer Andres Freund reported the backdoor after investigating a performance 