In [None]:
# fullstack gpt code challenge 08
import json
from dotenv import dotenv_values
from duckduckgo_search import DDGS
from langchain.agents import initialize_agent, AgentType
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.schema import SystemMessage
from langchain.tools import BaseTool, WikipediaQueryRun
from langchain.utilities import WikipediaAPIWrapper
from pydantic import BaseModel, Field
from typing import Type


config = dotenv_values(".env")

llm = ChatOpenAI(
    openai_api_key=config['OPENAI_API_KEY'],
    model="gpt-4o-mini",
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)


class WikipediaSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for information. ex) XZ backdoor"
    )

class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearchTool"
    description = """
        Use this tool to search information on wikipedia site.
        It takes a query as an argument.
    """
    args_schema: Type[
        WikipediaSearchToolArgsSchema
    ] = WikipediaSearchToolArgsSchema

    def _run(self, query):
        wrapper = WikipediaAPIWrapper()
        search = WikipediaQueryRun(api_wrapper=wrapper)
        return search.run(query)


class DuckDuckGoSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for information. ex) XZ backdoor"
    )

class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearchTool"
    description = """
        Use this tool to search information on duckduckgo site.
        It takes a query as an argument.
    """
    args_schema: Type[
        DuckDuckGoSearchToolArgsSchema
    ] = DuckDuckGoSearchToolArgsSchema

    def _run(self, query):
        # fix : DuckDuckGoSearchAPIWrapper (HTTP Error) -> duckduckgo_search.DDGS
        # ddgs text -k "Research about the XZ backdoor"
        search = DDGS().text(query)
        return json.dumps(list(search))


class SearchResultParseToolArgsSchema(BaseModel):
    link: str = Field(
        description="The site link retrieved from web search"
    )

class SearchResultParseTool(BaseTool):
    name = "SearchResultParseTool"
    description = """
        Use this tool to load link to return detail content.
        It takes a link as an argument.
    """
    args_schema: Type[
        SearchResultParseToolArgsSchema
    ] = SearchResultParseToolArgsSchema

    def _run(self, link):
        loader = WebBaseLoader(link, verify_ssl=True)
        data = loader.load()
        return data


class SearchResultSaveToolArgsSchema(BaseModel):
    content: str = Field(
        description="The search result on wikipedia or duckduckgo site"
    )

class SearchResultSaveTool(BaseTool):
    name = "SearchResultSaveTool"
    description = """
        Use this tool to save web search result.
        It takes a result as an argument.
    """
    args_schema: Type[
        SearchResultSaveToolArgsSchema
    ] = SearchResultSaveToolArgsSchema

    def _run(self, content):
        file_path = "./challenge-08.result"
        with open(file_path, "w+", encoding="utf-8") as f:
            f.write(content)


agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        SearchResultParseTool(),
        SearchResultSaveTool(),
    ],
    agent_kwargs={
        "system_message": SystemMessage(
            content="""
                You are a web research expert.

                You search information by query and save the result contents into file.
                Be sure to use two sites and summarize the results less than 1000 words.
                If communication error occurs, skip the task and go to next step, please.
            """
        )
    },
)

query = "Research about the XZ backdoor"
result = agent.invoke(query)
print(result)