In [None]:
from typing import Any, Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import WikipediaAPIWrapper
from datetime import datetime
from bs4 import BeautifulSoup
import requests
from langchain.utilities import DuckDuckGoSearchAPIWrapper

llm = ChatOpenAI(    
    temperature=1,
    model_name="gpt-4-turbo-preview",
    streaming=True,)

class WikipediaSearchInput(BaseModel):
    query: str = Field(description="Search query for Wikipedia")


class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearch"
    description = """
    Use this tool to search for information on Wikipedia.
    Input should be a search query string.
    Returns detailed information from Wikipedia articles.
    """
    args_schema: Type[BaseModel] = WikipediaSearchInput

    def _run(self, query: str) -> str:
        wiki = WikipediaAPIWrapper()
        try:
            result = wiki.run(query)
            return result
        except Exception as e:
            return f"Wikipedia search error: {str(e)}"


class DuckDuckGoSearchInput(BaseModel):
    query: str = Field(description="Search query for DuckDuckGo")


class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearch"
    description = """
    SECONDARY SEARCH TOOL. Use this if Wikipedia doesn't have enough information or for recent news.
    This searches the broader internet for additional details and current events.
    Input should be a search query string.
    Returns search results with titles, descriptions, and URLs.
    """
    args_schema: Type[BaseModel] = DuckDuckGoSearchInput

    def _run(self, query):
        ddg = DuckDuckGoSearchAPIWrapper()
        return ddg.run(query)

class WebScraperInput(BaseModel):
    url: str = Field(description="URL of the website to scrape")


class WebScraperTool(BaseTool):
    name = "WebScraper"
    description = """
    Use this tool to scrape and extract text content from a website.
    Input should be a complete URL (starting with http:// or https://).
    Returns the main text content from the webpage.
    """
    args_schema: Type[BaseModel] = WebScraperInput

    def _run(self, url: str) -> str:
        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
            }
            response = requests.get(url, headers=headers, timeout=15)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.content, 'html.parser')
            
            for tag in soup(['script', 'style', 'nav', 'footer', 'header', 'aside', 'iframe']):
                tag.decompose()
            
            text = soup.get_text(separator='\n', strip=True)
            
            lines = [line.strip() for line in text.split('\n') if line.strip()]
            cleaned_text = '\n'.join(lines)
            
            if len(cleaned_text) > 10000:
                cleaned_text = cleaned_text[:10000] + "\n\n...(content truncated for length)"
            
            return f"Content from {url}:\n\n{cleaned_text}"
        except Exception as e:
            return f"Web scraping error for {url}: {str(e)}"


class FileSaverInput(BaseModel):
    content: str = Field(description="Content to save to file")
    filename: str = Field(description="Name of the file (without .txt extension)")


class FileSaverTool(BaseTool):
    name = "FileSaver"
    description = """
    Use this tool to save research results to a .txt file.
    Input should be two parameters: content (the text to save) and filename (without .txt extension).
    Returns confirmation message with the saved filename.
    """
    args_schema: Type[BaseModel] = FileSaverInput

    def _run(self, content: str, filename: str) -> str:
        try:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            full_filename = f"{filename}_{timestamp}.txt"
            
            with open(full_filename, 'w', encoding='utf-8') as f:
                f.write(content)
            
            return f"Successfully saved research to {full_filename}"
        except Exception as e:
            return f"File saving error: {str(e)}"


tools = [
    WikipediaSearchTool(),
    # 버전 문제로 인해 주석 처리
    # DuckDuckGoSearchTool(),
    WebScraperTool(),
    FileSaverTool(),
]

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=tools,
    max_iterations=10,
)


query = """Research about the XZ backdoor and save it to a file.

IMPORTANT: You MUST use the FileSaver tool at the end to save your research!

Steps:
1. Search Wikipedia for "XZ Utils backdoor"
2. Create a comprehensive report with all findings
3. **MUST USE FileSaver tool to save the complete report**

Do not finish until you have used the FileSaver tool!
"""

result = agent.invoke({"input": query})

print("\nFinal Answer:")
print(result['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearch` with `{'query': 'XZ Utils backdoor'}`


[0m[36;1m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March