In [1]:
import asyncio
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.tools import BaseTool
from langchain.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper
from pydantic import Field
from playwright.async_api import async_playwright

# Initialize the LLM (Language Model)
llm = ChatOpenAI(temperature=0.1)

# Tool for Wikipedia search
class WikipediaSearchTool(BaseTool):
    name: str = Field(default="WikipediaSearchTool")
    description: str = Field(default="Search Wikipedia for a given query.")

    def _run(self, query: str):
        wikipedia_search = WikipediaAPIWrapper()
        return wikipedia_search.run(query)

# Tool for DuckDuckGo search
class DuckDuckGoSearchTool(BaseTool):
    name: str = Field(default="DuckDuckGoSearchTool")
    description: str = Field(default="Search DuckDuckGo for a given query.")

    def _run(self, query: str) -> str:
        ddg = DuckDuckGoSearchAPIWrapper()
        return ddg.run(query)

# Tool to scrape website content from a URL using Playwright Async API
class WebsiteContentScraperTool(BaseTool):
    name: str = Field(default="WebsiteContentScraperTool")
    description: str = Field(default="Scrape and extract text content from a given URL using Playwright Async API.")

    async def _run(self, url: str) -> str:
        try:
            async with async_playwright() as p:
                browser = await p.chromium.launch(headless=True)
                page = await browser.new_page()
                await page.goto(url)
                content = await page.content()
                await browser.close()
                # Extract only text content from the body tag
                return page.evaluate("document.body.innerText")
        except Exception as e:
            return f"Failed to scrape website content: {e}"

# Tool to save research results to a .txt file
class SaveToFileTool(BaseTool):
    name: str = Field(default="SaveToFileTool")
    description: str = Field(default="Save the research content to a .txt file.")

    def _run(self, content: str):
        if not content.strip():  # 빈 콘텐츠 방지
            return "No content to save."
        try:
            # Remove the Web Content section from the saved results
            content_lines = content.split("\n")
            filtered_content = "\n".join(line for line in content_lines if not line.startswith("Web Content:"))

            with open("research_results.txt", "w", encoding="utf-8") as file:
                file.write(filtered_content)
            return "Research results saved to research_results.txt"
        except Exception as e:
            return f"Failed to save file: {e}"

# Define the tools to be used by the agent
tools = [
    WikipediaSearchTool(),
    DuckDuckGoSearchTool(),
    WebsiteContentScraperTool(),
    SaveToFileTool(),
]

# Initialize the agent
agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    tools=tools,
)

# Define the query
query = "Research about the XZ backdoor"

# Async function to execute the agent steps
async def run_async_agent():
    # Step 1: Wikipedia Search
    wikipedia_results = WikipediaSearchTool()._run(query)

    # Step 2: DuckDuckGo Search
    ddg_results = DuckDuckGoSearchTool()._run(query)

    # Step 3: Web Scraping (example URL from DuckDuckGo results)
    example_url = "https://example.com"  # Replace with URL from ddg_results if needed
    web_content = await WebsiteContentScraperTool()._run(example_url)

    # Combine all results
    final_content = f"Wikipedia Results:\n{wikipedia_results}\n\nDuckDuckGo Search Results:\n{ddg_results}\n\nWeb Content:\n{web_content}"

    # Step 4: Save Results
    save_response = SaveToFileTool()._run(final_content)

    # Print Results
    print("Wikipedia Results:", wikipedia_results)
    print("DuckDuckGo Results:", ddg_results)
    print("Save Response:", save_response)

# Execute the async function in Jupyter Notebook
await run_async_agent()


  llm = ChatOpenAI(temperature=0.1)
  agent = initialize_agent(


Wikipedia Results: Page: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution capabilities on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2024.

Page: Supply chain attack
Summary: A supply chain attack is a cyber-attack that seeks to damage an organization by targetin

In [1]:

from typing import Any, Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import WikipediaAPIWrapper
from langchain.schema import SystemMessage
from langchain.document_loaders import WebBaseLoader
from langchain.tools import DuckDuckGoSearchResults
from langchain.tools import WikipediaQueryRun

# ============================================================
# 커스텀 도구를 정의합니다.


class DuckDuckGoSearchToolArgsSchema(BaseModel):
    query: str = Field(description="The query you will search for")


class DuckDuckGoSearchTool(BaseTool):
    name: str = "DuckDuckGoSearchTool"  # 타입 힌트 추가
    description: str = """
    Use this tool to perform web searches using the DuckDuckGo search engine.
    It takes a query as an argument.
    Example query: "Latest technology news"
    """
    args_schema: Type[DuckDuckGoSearchToolArgsSchema] = DuckDuckGoSearchToolArgsSchema

    def _run(self, query) -> Any:
        search = DuckDuckGoSearchResults()
        return search.run(query)


class WikipediaSearchToolArgsSchema(BaseModel):
    query: str = Field(description="The query you will search for on Wikipedia")


class WikipediaSearchTool(BaseTool):
    name: str = "WikipediaSearchTool"  # 타입 힌트 추가
    description: str = """
    Use this tool to perform searches on Wikipedia.
    It takes a query as an argument.
    Example query: "Artificial Intelligence"
    """
    args_schema: Type[WikipediaSearchToolArgsSchema] = WikipediaSearchToolArgsSchema

    def _run(self, query) -> Any:
        wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
        return wiki.run(query)


class WebScrapingToolArgsSchema(BaseModel):
    url: str = Field(description="The URL of the website you want to scrape")


class WebScrapingTool(BaseTool):
    name: str = "WebScrapingTool"  # 타입 힌트 추가
    description: str = """
    If you found the website link in DuckDuckGo,
    Use this to get the content of the link for my research.
    """
    args_schema: Type[WebScrapingToolArgsSchema] = WebScrapingToolArgsSchema

    def _run(self, url):
        loader = WebBaseLoader([url])
        docs = loader.load()
        text = "\n\n".join([doc.page_content for doc in docs])
        return text


class SaveToTXTToolArgsSchema(BaseModel):
    text: str = Field(description="The text you will save to a file.")


class SaveToTXTTool(BaseTool):
    name: str = "SaveToTXTTOOL"  # 타입 힌트 추가
    description: str = """
    Use this tool to save the content as a .txt file.
    """
    args_schema: Type[SaveToTXTToolArgsSchema] = SaveToTXTToolArgsSchema

    def _run(self, text) -> Any:
        print(text)
        with open("research_results.txt", "w") as file:
            file.write(text)
        return "Research results saved to research_results.txt"


# ============================================================
# 에이전트를 만듭니다.
# 이 때, 시스템 메세지를 만들어서 에이전트에게 전달합니다.

llm = ChatOpenAI(
    temperature=0.1,
    model="gpt-4o-mini",
)

system_message = SystemMessage(
    content="""
        You are a research expert.

        Your task is to use Wikipedia or DuckDuckGo to gather comprehensive and accurate information about the query provided. 

        When you find a relevant website through DuckDuckGo, you must scrape the content from that website. Use this scraped content to thoroughly research and formulate a detailed answer to the question. 

        Combine information from Wikipedia, DuckDuckGo searches, and any relevant websites you find. Ensure that the final answer is well-organized and detailed, and include citations with links (URLs) for all sources used.

        Your research should be saved to a .txt file, and the content should match the detailed findings provided. Make sure to include all sources and relevant information.

        The information from Wikipedia must be included.

        Ensure that the final .txt file contains detailed information, all relevant sources, and citations.
        """
)

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    tools=[
        DuckDuckGoSearchTool(),
        WikipediaSearchTool(),
        WebScrapingTool(),
        SaveToTXTTool(),
    ],
    agent_kwargs={"system_message": system_message},
)


# ============================================================
# 정상적으로 작동하는지 확인합니다.

query = "Research about the XZ backdoor"
results = agent.run(query)
print(results)

USER_AGENT environment variable not set, consider setting it to identify your requests.
  llm = ChatOpenAI(
  agent = initialize_agent(
  results = agent.run(query)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `DuckDuckGoSearchTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3msnippet: What Does the Backdoor Do? Malicious code added to xz Utils versions 5.6.0 and 5.6.1 modified the way the software functions. The backdoor manipulated sshd, the executable file used to make remote ..., title: The XZ Backdoor: Everything You Need to Know - WIRED, link: https://www.wired.com/story/xz-backdoor-everything-you-need-to-know/, snippet: Because the backdoor was discovered before the malicious versions of xz Utils were added to production versions of Linux, "it's not really affecting anyone in the real world," Will Dormann, a ..., title: Backdoor found in widely used Linux utility targets encrypted SSH ..., link: https://arstechnica.com/security/2024/03/backdoor-found-in-widely-used-linux-utility-breaks-encrypted-ssh-connections/, snippet: Now that the XZ backdoor has control over sshd, attackers can possess the encryption ke