In [2]:
from typing import Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.retrievers import WikipediaRetriever
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.document_loaders import AsyncChromiumLoader
from langchain.document_transformers import Html2TextTransformer
from typing import List
from pathlib import Path

llm = ChatOpenAI(temperature=0.1, model_name="gpt-3.5-turbo-1106")


class WikipediaSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for. Example query: Research about the Generative AI"
    )


class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearchTool"
    description = """
    Use this tool to find the research.
    It takes a query as an argument.
    """
    args_schema: Type[WikipediaSearchToolArgsSchema] = WikipediaSearchToolArgsSchema

    def _run(self, query):
        retriever = WikipediaRetriever(top_k_results=5)
        return [doc.page_content for doc in retriever.get_relevant_documents(query)]


class DuckDuckGoSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for. Example query: Research about the Generative AI"
    )


class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearchTool"
    description = """
    Use this tool to find the research.
    It takes a query as an argument.
    It will return the website urls.
    """
    args_schema: Type[DuckDuckGoSearchToolArgsSchema] = DuckDuckGoSearchToolArgsSchema

    def _run(self, query):
        search = DuckDuckGoSearchAPIWrapper()
        return [result["link"] for result in search.results(query, num_results=5)]


class WebsiteScrapToolArgsSchema(BaseModel):
    urls: List[str] = Field(
        description="The urls you will get. Example query: ['https://www.wikipedia.org', 'https://www.google.com']"
    )


class WebsiteScrapTool(BaseTool):
    name = "WebsiteScrapTool"
    description = """
    Use this tool to extract contents from websites.
    It takes urls as an argument.
    """
    args_schema: Type[WebsiteScrapToolArgsSchema] = WebsiteScrapToolArgsSchema

    def _run(self, urls):
        loader = AsyncChromiumLoader(urls)
        docs = loader.load()
        html2text_transformer = Html2TextTransformer()
        return html2text_transformer.transform_documents(docs)


class SaveTextFileToolArgsSchema(BaseModel):
    docs: List[str] = Field(description="The docs you want save to text file.")


class SaveTextFileTool(BaseTool):
    name = "SaveTextFileTool"
    description = """
    Use this tool to save text file.
    It takes docs as an argument.
    """
    args_schema: Type[SaveTextFileToolArgsSchema] = SaveTextFileToolArgsSchema

    def _run(self, docs):
        Path("./files").mkdir(parents=True, exist_ok=True)
        with open("./files/output.txt", "w") as f:
            for doc in docs:
                f.write(doc + "\n\n")


agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        WebsiteScrapTool(),
        SaveTextFileTool(),
    ],
)

prompt = "Research about the XZ backdoor. And save the found document as a text file."

agent.invoke(prompt)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearchTool` with `{'query': 'XZ backdoor'}`


[0m



  lis = BeautifulSoup(html).find_all('li')


[36;1m[1;3m['In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution capabilities on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.\nWhile xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2024.\n\n\n== Background ==\nMicrosoft employee and PostgreSQL developer Andres Freund reported the backdoor after investigating a performance regression in Debian Si

{'input': 'Research about the XZ backdoor. And save the found document as a text file.',
 'output': 'The document about the XZ backdoor has been saved as a text file. Is there anything else you would like to do?'}