In [1]:
import os
from dotenv import load_dotenv
import requests
from typing import Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from langchain.agents import Tool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import DuckDuckGoSearchRun

In [3]:
### Search in Wikipedia & DuckDuckGo

class WikipediaSearchArgs(BaseModel):
    query: str = Field(description="Search term for Wikipedia")

class WikipediaSearchTool(BaseTool):
    name = "wikipedia_search" 
    description = "Use this tool to search for a topic summary on Wikipedia"
    args_schema: Type[WikipediaSearchArgs] = WikipediaSearchArgs

    def _run(self, query: str) -> str:
        wiki = WikipediaAPIWrapper()
        return wiki.run(query)

    def _arun(self, query: str):
        raise NotImplementedError("Async not supported.")



class DuckDuckGoSearchArgs(BaseModel):
    query: str = Field(description="Find information on the web")

class DuckDuckGoSearchTool(BaseTool):
    name = "duckduckgo_search" 
    description = "Use this tool to search the web for information and return a quick summary."
    args_schema: Type[DuckDuckGoSearchArgs] = DuckDuckGoSearchArgs

    def _run(self, query: str) -> str:
        ddg = DuckDuckGoSearchAPIWrapper()
        return ddg.run(query)


In [None]:
# Web Scraper
class ScraperArgs(BaseModel):
    url: str = Field(description="URL to scrape")

class WebScraperTool(BaseTool):
    name = "webscraper"
    description = "Use this tool to scrape text content from a given webpage."
    args_schema: Type[ScraperArgs] = ScraperArgs

    def _run(self, url: str) -> str:
        r = requests.get(url, timeout=10)
        soup = BeautifulSoup(r.text, "html.parser")
        text = "\n".join(p.get_text() for p in soup.find_all("p")[:10]) #limit 
        return text.strip()

In [None]:
# Save to txt file
class SaveFileArgs(BaseModel):
    content: str = Field(description="Text content to save")

class SaveToFileTool(BaseTool):
    name = "save_research_to_file"
    description = "Use this tool to save result to a txt file"
    args_schema: Type[SaveFileArgs] = SaveFileArgs

    def _run(self, content: str) -> str:
        with open("research.txt", "w", encoding="utf-8") as f:
                f.write(content)
        return "Research saved to research.txt"

### Research AI Agent

In [5]:
# Load environment variables
load_dotenv("../.env")  
openai_api_key = os.getenv("OPENAI_API_KEY")

# Define Chat Model
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo", 
    temperature=0,  #low temp: factual
    openai_api_key=openai_api_key
)
# Define Agent
agent = initialize_agent(
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        WebScraperTool(),
        SaveToFileTool(),
    ],
    llm=llm,
    agent=AgentType.OPENAI_FUNCTIONS,
    verbose=True,
)

### Run the Agent

In [None]:
prompt = "Research about the XZ backdoor and save the findings to a text file."
agent.invoke(prompt)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `wikipedia_search` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2024

{'input': 'Research about the XZ backdoor',
 'output': 'The XZ backdoor refers to a malicious backdoor that was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1. The backdoor was created by an account using the name "Jia Tan" and allows an attacker with a specific Ed448 private key to execute remote code through OpenSSH on the affected Linux system. This issue has been assigned the Common Vulnerabilities and Exposures number CVE-2024-3094 and has a CVSS score of 10.0, the highest possible score.\n\nThe xz utility is commonly present in most Linux distributions, but at the time of discovery, the backdoored version had not been widely deployed to production systems. The backdoor was discovered by software developer Andres Freund, who announced his findings on March 29, 2024.\n\nA backdoor is a covert method of bypassing normal authentication or encryption in a computer system, product, or device. It is often used for securing remote a