In [1]:
# KR)
# 새로운 Jupyter notebook에서 리서치 AI 에이전트를 만들고 커스텀 도구를 부여합니다.
# 에이전트는 다음 작업을 수행할 수 있어야 합니다:
# Wikipedia에서 검색
# DuckDuckGo에서 검색  => 차단되어서 제외 함 . 
# 웹사이트의 텍스트를 스크랩하고 추출합니다.
# 리서치 결과를 .txt 파일에 저장하기
# 다음 쿼리로 에이전트를 실행합니다: "Research about the XZ backdoor" 라는 쿼리로 에이전트를 실행하면, 에이전트는 Wikipedia 또는 DuckDuckGo에서 검색을 시도하고, DuckDuckGo에서 웹사이트를 찾으면 해당 웹사이트에 들어가서 콘텐츠를 추출한 다음 .txt 파일에 조사 내용을 저장하는 것으로 완료해야 합니다.
import os
import requests
from typing import Type
from bs4 import BeautifulSoup
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import WikipediaAPIWrapper

# 1. LLM 설정
llm = ChatOpenAI(temperature=0.1, model_name="gpt-3.5-turbo-1106")

# --- 커스텀 도구 1: Wikipedia 검색 ---
class WikipediaSearchArgsSchema(BaseModel):
    query: str = Field(description="Wikipedia에서 검색할 키워드")

class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearch"
    description = "Wikipedia에서 전문적이고 신뢰할 수 있는 정보를 검색합니다."
    args_schema: Type[WikipediaSearchArgsSchema] = WikipediaSearchArgsSchema

    def _run(self, query):
        wiki = WikipediaAPIWrapper()
        return wiki.run(query)

# --- 커스텀 도구 2: 웹사이트 스크래퍼 ---
class WebScraperArgsSchema(BaseModel):
    url: str = Field(description="텍스트를 추출할 웹사이트의 URL")

class WebScraperTool(BaseTool):
    name = "WebScraper"
    description = "특정 웹사이트 URL에 직접 접속하여 본문 텍스트 내용을 추출합니다."
    args_schema: Type[WebScraperArgsSchema] = WebScraperArgsSchema

    def _run(self, url):
        try:
            # 봇 차단을 피하기 위한 기본 헤더 설정
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
            }
            response = requests.get(url, headers=headers, timeout=10)
            soup = BeautifulSoup(response.text, "html.parser")
            
            for script_or_style in soup(["script", "style", "header", "footer", "nav"]):
                script_or_style.decompose()
            
            text = soup.get_text(separator="\n")
            lines = [line.strip() for line in text.splitlines() if line.strip()]
            return "\n".join(lines)[:2000] 
        except Exception as e:
            return f"스크래핑 중 오류 발생: {e}"

# --- 커스텀 도구 3: 파일 저장 ---
class FileSaverArgsSchema(BaseModel):
    content: str = Field(description="파일에 저장할 리서치 내용 전체")
    filename: str = Field(default="research_results.txt", description="저장할 파일 이름")

class FileSaverTool(BaseTool):
    name = "FileSaver"
    description = "조사된 최종 결과물을 .txt 파일로 저장합니다."
    args_schema: Type[FileSaverArgsSchema] = FileSaverArgsSchema

    def _run(self, content, filename="research_results.txt"):
        try:
            with open(filename, "w", encoding="utf-8") as f:
                f.write(content)
            return f"성공적으로 {filename}에 저장되었습니다."
        except Exception as e:
            return f"파일 저장 중 오류 발생: {e}"

# --- 에이전트 초기화 (DuckDuckGo 제외) ---
agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        WikipediaSearchTool(),
        WebScraperTool(),
        FileSaverTool(),
    ],
)

# --- 실행 명령 (Wikipedia 위주로 리서치하도록 지시) ---
prompt = (
    "Research about the 'XZ backdoor' using Wikipedia. "
    "1. Find technical details and a summary from Wikipedia. "
    "2. If Wikipedia provides external links or specific URLs, use the WebScraper to get more details. "
    "3. Compile the findings and save the final report into 'research_results.txt' using the FileSaver."
)

agent.invoke(prompt)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearch` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2024.

{'input': "Research about the 'XZ backdoor' using Wikipedia. 1. Find technical details and a summary from Wikipedia. 2. If Wikipedia provides external links or specific URLs, use the WebScraper to get more details. 3. Compile the findings and save the final report into 'research_results.txt' using the FileSaver.",
 'output': 'I have compiled the research findings into a file named "research_results.txt." Here is the content of the file:\n\n```\nXZ Utils Backdoor\n\nSummary:\nIn February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score. While xz is commonly present in m