In [62]:
from typing import Any, Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.retrievers import WikipediaRetriever
import requests
from bs4 import BeautifulSoup
from langchain.schema import SystemMessage

llm = ChatOpenAI(temperature=0.1)


def format_docs(docs):
    return "\n\n".join(document.page_content for document in docs)


class ResearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for."
    )


class DuckDuckGoReearchTool(BaseTool):
    name = "DuckDuckGoReearchTool"
    description = """
    Use this tool to find the information about a query using DuckDuckGo.
    
    """
    args_schema: Type[ResearchToolArgsSchema] = ResearchToolArgsSchema

    def _run(self, query):
        ddg = DuckDuckGoSearchAPIWrapper()
        return ddg.run(query)


class WikipediaResearchTool(BaseTool):
    name = "WikipediaResearchTool"
    description = """
    Use this tool to find the information about a query using Wikipedia.
    
    """
    args_schema: Type[ResearchToolArgsSchema] = ResearchToolArgsSchema

    def _run(self, query):
        retriever = WikipediaRetriever(top_k_results=5, lang="en")
        docs = retriever.get_relevant_documents(query)
        return format_docs(docs)


class WebSiteContentExtracToolArgsSchema(BaseModel):
    query: str = Field(description="The url of the website to extract the content.")


class WebSiteContentExtracTool(BaseTool):
    name = "WebSiteContentExtracTool"
    description = """
    Use this tool to extract the content of a website.
    
    """
    args_schema: Type[WebSiteContentExtracToolArgsSchema] = (
        WebSiteContentExtracToolArgsSchema
    )

    def _run(self, query):
        url = query
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        # 모든 <p> 태그의 텍스트를 추출하고 결합
        content = ' '.join([p.text for p in soup.find_all('p')])
        print(content)
        return content


class SaveToFileToolArgsSchema(BaseModel):
    query: str = Field(description="The research result contents to save in a file.")


class SaveToFileTool(BaseTool):
    name = "SaveToFileTool"
    description = """
    Use this tool to save the information in a file.
    
    """
    args_schema: Type[SaveToFileToolArgsSchema] = SaveToFileToolArgsSchema

    def _run(self, query):
        with open(f"result.txt", "w") as f:
            f.write(query)
        return f"Information saved in result.txt file."

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        DuckDuckGoReearchTool(),
        WikipediaResearchTool(),
        WebSiteContentExtracTool(),
        SaveToFileTool(),
    ],
    agent_kwargs={
        "system_message": SystemMessage(
            content="""
            You are a research specialist.
            Take a deep breath and proceed with the following task.
            1. Search for the information about a query using DuckDuckGo.
            2. Search for the information about a query using Wikipedia.
            3. Extract the content of a website if any url is included in the search result.
            4. Save all the search results in a text file, and it should contain all the information you have found in step 1, 2, and 3.
            """
        )
    },
)


prompt = "Research about the XZ backdoor"

agent.invoke(prompt)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `DuckDuckGoReearchTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mThe version of XZ should be carefully checked, as versions 5.6.0 and 5.6.1 contain the backdoor. It is advised to revert to a previous known safe version of XZ Utils, such as 5.4. Software supply ... What Does the Backdoor Do? Malicious code added to xz Utils versions 5.6.0 and 5.6.1 modified the way the software functions. The backdoor manipulated sshd, the executable file used to make remote ... On March 28, 2024 a backdoor was identified in XZ Utils. This vulnerability, CVE-2024-3094 with a CVSS score of 10 is a result of a software supply chain compromise impacting versions 5.6.0 and 5.6.1 of XZ Utils. The U.S. Cybersecurity and Infrastructure Security Agency (CISA) has recommended organizations to downgrade to a previous non-compromised XZ Utils version. That operation matches the style of the XZ Utils backdoor far more than the cruder 

{'input': 'Research about the XZ backdoor',
 'output': 'I have researched about the XZ backdoor. Here is the information I found:\n\n### XZ Backdoor:\nThe version of XZ should be carefully checked, as versions 5.6.0 and 5.6.1 contain the backdoor. It is advised to revert to a previous known safe version of XZ Utils, such as 5.4. Software supply ... What Does the Backdoor Do? Malicious code added to xz Utils versions 5.6.0 and 5.6.1 modified the way the software functions. The backdoor manipulated sshd, the executable file used to make remote ... On March 28, 2024 a backdoor was identified in XZ Utils. This vulnerability, CVE-2024-3094 with a CVSS score of 10 is a result of a software supply chain compromise impacting versions 5.6.0 and 5.6.1 of XZ Utils. The U.S. Cybersecurity and Infrastructure Security Agency (CISA) has recommended organizations to downgrade to a previous non-compromised XZ Utils version. That operation matches the style of the XZ Utils backdoor far more than the cru