In [1]:

import dotenv
from bs4 import BeautifulSoup
from langchain.agents import initialize_agent, AgentType
from langchain.document_loaders import WikipediaLoader
from langchain.schema import SystemMessage
from langchain.tools import BaseTool
from pydantic import Field
from pydantic.main import BaseModel

dotenv.load_dotenv()

from duckduckgo_search import DDGS
assignment_query = "Research about the XZ backdoor"

True

In [3]:
from langchain.tools import BaseTool
class WikipediaSearchToolArgsSchema(BaseModel):
    query: str = Field("The query for information")

class WikipediaSearchTool(BaseTool):
    name:str = "WikipediaSearchTool"
    description:str = """
    Search Wikipedia, get 3 important keywords from it, and returns as python array of keywords
    """
    def _run(self, query, **kwargs):
        wiki = WikipediaLoader(
            query=query,
        )
        return wiki

In [4]:
from typing import Type, TypedDict


class DuckDuckGoToolArgsSchema(BaseModel):
    queries: list[str] = Field(description="The queries search urls")

class DuckDuckGoSearchTool(BaseTool):
    name:str = "DuckDuckGoSearchTool"
    description:str = """
    This tool send queries to Duck Duck Go Search engine to get link associated with the query.
    """
    args_schema:Type[DuckDuckGoToolArgsSchema] = DuckDuckGoToolArgsSchema
    def _run(self, queries: list[str], **kwargs):
        ddgs = DDGS()
        result =  [ddgs.text(query, region='ko-KR', max_results=2)for query in queries]
        return result

In [5]:
from typing_extensions import TypedDict
import requests
class Info(TypedDict):
    title: str
    href: str
class ScrapingWebsitesToolArgsSchema(BaseModel):
    infos: list[Info] = Field(description="Arrays of information that has title and href.")
class ScrapingWebsitesTool(BaseTool):
    name: str = "ScrapingWebsitesTool"
    description:str = "DuckDuckGoSearchTool will give href to this tool and then this tools will scape the website. "
    args_schema:Type[ScrapingWebsitesToolArgsSchema] = ScrapingWebsitesToolArgsSchema
    def _run(self, infos:list[Info], **kwargs):
        results = []
        for info in infos:
            res = requests.get(info['href'], verify=False)
            soup = BeautifulSoup(res.content, "html.parser")
            results.append({
                'title': info['title'],
                'href': info['href'],
                'text': soup.get_text(strip=True).replace("\n\n", "\n"),})
        return results

In [6]:
class SavingInfo(TypedDict):
    title: str
    href: str
    text: str
class SavingToFileToolArgsSchema(BaseModel):
    infos: list[SavingInfo] = Field(description="Arrays of dictionary that has title, href, and text as key.")
class SavingToFileTool(BaseTool):
    name:str = "SavingToFileTool"
    description:str = "This tool will save all information from Scarping tool information"

    def _run(self, infos:list[SavingInfo], **kwargs):
        import csv
        with open("./information.txt", "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(["title", "href", "text"])
            for info in infos:
                title = info.get("title", "N/A")
                href = info.get("href", "N/A")
                text = info.get("text", "N/A")
                writer.writerow([title, href, text])


In [7]:
from langchain.chat_models import ChatOpenAI
from langchain.agents import AgentType
llm = ChatOpenAI(
    model="gpt-4o-mini",
)
agent = initialize_agent(
    llm=llm,
    agent=AgentType.OPENAI_FUNCTIONS,
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        ScrapingWebsitesTool(),
        SavingToFileTool(),
    ],
    # verbose=True,
    handle_parsing_errors=True,
    agent_kwargs={
        "system_message": SystemMessage(content="""
        You are a good research AI for collection information.
        You will collect information from Wikipedia. Summary it and make array for searching web href from DuckDuckGo.
        Next, you will scrape information from those urls above hrefs using ScrapingWebsitesTool.
        And ScarpingWebsitesTool's result will be send to SavingToFileTool for saving file information.
        Beware that before send information from ScrapingWebsiteTool to SavingFileTool you must summary the text that each array of dictionary has. Good luck.
        """)
    }
)
result = agent.invoke(assignment_query)
result

  llm = ChatOpenAI(
  agent = initialize_agent(


{'input': 'Research about the XZ backdoor',
 'output': 'I have gathered information about the XZ backdoor, a significant cybersecurity incident involving the XZ Utils software. Here’s a summary of the findings:\n\n### Summary of the XZ Backdoor Incident\n1. **Discovery**: In February 2024, a malicious backdoor was embedded in the Linux build of the XZ utility, specifically within versions 5.6.0 and 5.6.1 of the liblzma library. It was introduced by an individual or group using the name "Jia Tan."\n   \n2. **Functionality**: The backdoor allowed attackers possessing a specific Ed448 private key to execute remote code on affected Linux systems, potentially compromising the entire system’s security.\n\n3. **Detection**: The backdoor was discovered by software developer Andres Freund while he was investigating performance issues related to SSH connections on Debian systems. The public disclosure occurred on March 29, 2024.\n\n4. **Impact**: The sophistication of the backdoor was highlighte