In [7]:
import requests
import os
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from langchain.schema import SystemMessage
from typing import Type
from langchain.tools import BaseTool
from langchain.utilities.wikipedia import WikipediaAPIWrapper
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.chat_models import ChatOpenAI

In [8]:
load_dotenv()
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    temperature=0.1,
    streaming=False,
    callbacks=[StreamingStdOutCallbackHandler()],
)

class DuckDuckGoSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query to search from duckduckgo search. Example query: Research about the XZ backdoor.",
    )

class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearchTool"
    description = """
    "A wrapper around Duck Duck Go Search. "
    "Search for information on the internet using DuckDuckGo."
    """
    args_schema: Type[
        DuckDuckGoSearchToolArgsSchema
    ] = DuckDuckGoSearchToolArgsSchema

    def _run(self, query: str) -> str:
        try:
            resp = requests.get(
                url="https://api.duckduckgo.com/",
                params={"q": query, "format": "json"},
                headers={"User-Agent": "Mozilla/5.0"},
                timeout=10,
            )
            resp.raise_for_status()
            result = resp.json()['Abstract']
            if not result:
                return 'Empty'
            return result
        except Exception as e:
            return f"[DuckDuckGoSearchTool Error] {type(e).__name__}: {e}"

class WikipediaSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query to search from wikipedia. Example query: Research about the XZ backdoor.",
    )

class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearchTool"
    description = """
    Use this tool to find the wikipedia page for keyword.
    It takes a query as an argument.
    """
    args_schema: Type[
        WikipediaSearchToolArgsSchema
    ] = WikipediaSearchToolArgsSchema

    def _run(self, query):
        wiki = WikipediaAPIWrapper()
        return wiki.run(query)

class ScrapeArgs(BaseModel):
    url: str = Field(description="스크랩할 페이지 URL")

class ScrapeTextTool(BaseTool):
    name = "scrape_text"
    description = "주어진 URL의 <p> 태그 텍스트를 모두 합쳐서 반환합니다."
    args_schema = ScrapeArgs

    def _run(self, url: str) -> str:
        resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "html.parser")
        paragraphs = [p.get_text(strip=True) for p in soup.find_all("p")]
        return "\n\n".join(paragraphs[:10])  # 앞 10개 문단만

class SaveToTextArgs(BaseModel):
    filename: str = Field(description="저장할 파일명, 예: result.txt")
    content: str = Field(description="파일에 쓸 텍스트 내용")

class SaveToTextTool(BaseTool):
    name = "save_to_text"
    description = """
    주어진 content를 filename으로 저장합니다.
    반환값은 저장된 파일의 절대 경로입니다.
    """
    args_schema = SaveToTextArgs

    def _run(self, filename: str, content: str) -> str:
        with open(filename, "w", encoding="utf-8") as f:
            f.write(content)
        return f"Saved research results to {os.path.abspath(filename)}"


agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        DuckDuckGoSearchTool(),
        WikipediaSearchTool(),
        ScrapeTextTool(),
        SaveToTextTool()
    ],
    agent_kwargs={
        "system_message": SystemMessage(
            content="""
            You are a research agent. Follow these step:
            1) 먼저 Wikipedia에서 검색하고,
            2) DuckDuckGo에서 검색한 뒤 Instant Answer가 비어있으면
               검색 결과 페이지를 스크랩해서 내용을 가져오고,
            3) 마지막으로 모든 결과를 하나의 텍스트로 합쳐
               지정된 .txt 파일에 저장하세요.

           ```json
            {
              "name": "save_to_text",
              "arguments": {
                "filename": "xz_backdoor_research.txt",
                "content": "<지금까지 모은 모든 텍스트>"
              }
            }
        """
        )
    },
)

In [9]:
question = "Research about the XZ backdoor"
agent.invoke(question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearchTool` with `{'query': 'XZ backdoor'}`


[0m[33;1m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2

{'input': 'Research about the XZ backdoor',
 'output': 'I have completed the research on the XZ backdoor and saved the information to a text file. You can download the file [here](sandbox:/Users/cheon-yeongjo/IdeaProjects/LangChain-Challenge/xz_backdoor_research.txt).'}