In [None]:
from langchain.tools import DuckDuckGoSearchResults, WikipediaQueryRun
from langchain.utilities import WikipediaAPIWrapper
from langchain.document_loaders import WebBaseLoader
from pathlib import Path
from openai import OpenAI
import json
import os

def search_by_duckduckgo(inputs):
    query = inputs["query"]
    ddg = DuckDuckGoSearchResults()
    return ddg.run(query)

def search_by_wikipedia(inputs):
    query = inputs["query"]
    wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
    return wiki.run(query)

def scrape_web_page(inputs):
    url = inputs["url"]
    loader = WebBaseLoader([url])
    docs = loader.load()
    text = "\n\n".join([doc.page_content for doc in docs])
    return text

def save_to_file(inputs):
    output_dir = "./output"
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    text = inputs["text"]
    with open(f"{output_dir}/output.txt", "w") as file:
        file.write(text)
    return f"Text saved to {output_dir}/output.txt"

functions_map = {
    "search_by_duckduckgo": search_by_duckduckgo,
    "search_by_wikipedia": search_by_wikipedia,
    "scrape_web_page": scrape_web_page,
    "save_to_file": save_to_file,
}

functions = [
    {
        "type": "function",
        "function": {
            "name": "search_by_duckduckgo",
            "description": "Given a query, returns a URL of relevant websites from DuckDuckGo.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "Query to search for",
                    }
                },
                "required": ["query"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "search_by_wikipedia",
            "description": "Given a query, returns the summary of the query from Wikipedia.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "Query to search for",
                    }
                },
                "required": ["query"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "scrape_web_page",
            "description": "Given a URL, returns the content of the web page.",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {
                        "type": "string",
                        "description": "URL of the web page",
                    },
                },
                "required": ["url"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "save_to_file",
            "description": "Given a text content, saves it to a file.",
            "parameters": {
                "type": "object",
                "properties": {
                    "text": {
                        "type": "string",
                        "description": "Text content to save",
                    },
                },
                "required": ["text"],
            },
        },
    },
]

instructions = """
    You are a research expert.
    Your task is to use Wikipedia or DuckDuckGo to gather comprehensive and accurate information about the query provided. 
    When you find a relevant website through DuckDuckGo, you must scrape the content from that website. Use this scraped content to thoroughly research and formulate a detailed answer to the question. 
    Combine information from Wikipedia, DuckDuckGo searches, and any relevant websites you find. Ensure that the final answer is well-organized and detailed, and include citations with links (URLs) for all sources used.
    Your research should be saved to a .txt file, and the content should match the detailed findings provided. Make sure to include all sources and relevant information.
    The information from Wikipedia must be included.
    Ensure that the final .txt file contains detailed information, all relevant sources, and citations.
"""

client = OpenAI()

# assistant = client.beta.assistants.create(
#     name="Research Expert",
#     instructions=instructions,
#     model="gpt-4o-mini",
#     tools=functions,
# )

assistant_id = os.environ.get("OPENAI_ASSISTANT_ID")
print(assistant_id)

In [39]:
from typing_extensions import override
from openai import AssistantEventHandler

class EventHandler(AssistantEventHandler):
    def __init__(self, client, chat_callback = None):
       super().__init__()
       self.client = client
       self.answer = ""
       self.chat_callback = chat_callback

    @override
    def on_text_created(self, text) -> None:
        print("\nassistant > ", end="", flush=True)

    @override
    def on_text_delta(self, delta, snapshot):
        self.answer += delta.value
        print(delta.value, end="", flush=True)
    
    @override
    def on_text_done(self, text) -> None:
        print(f"on_text_done > chat_callback: {self.answer}")
        if self.chat_callback:
            print(f"on_text_done > calling chat_callback")
            self.chat_callback(self.answer)
    
    @override
    def on_end(self):
        run = self.current_run
        print(f"on_tool_call_done > run status: {run.status}")
        if run.status != "requires_action": return
        required_actions = run.required_action.submit_tool_outputs.tool_calls
        outputs = []
        for action in required_actions:
            action_id = action.id
            function = action.function
            print(f"on_end > calling required action: {function.name} with arg {function.arguments}")
            outputs.append(
                {
                    "output": functions_map[function.name](json.loads(function.arguments)),
                    "tool_call_id": action_id,
                }
            )
        with self.client.beta.threads.runs.submit_tool_outputs_stream(
            run_id=run.id, thread_id=run.thread_id, tool_outputs=outputs, event_handler=EventHandler(client=self.client, chat_callback=self.chat_callback)
        ) as stream:
            stream.until_done()

def event_handler_factory(client, chat_callback = None):
    return EventHandler(client=client, chat_callback=chat_callback)

class Assistant:
    def __init__(self, assistant_id, event_handler_factory):
       self.client = OpenAI()
       self.assistant_id = assistant_id
       self.event_handler_factory = event_handler_factory
       thread = self.client.beta.threads.create()
       self.thread_id = thread.id

    def query(self, content, chat_callback):
        event_handler = self.event_handler_factory(client=self.client, chat_callback=chat_callback);
        with client.beta.threads.runs.stream(
            thread_id=self.thread_id,
            assistant_id=self.assistant_id,
            event_handler=event_handler,
            additional_messages=[{
                "role": "user",
                "content": content
            }],    
        ) as stream:
            stream.until_done()
        return event_handler.answer

    def get_messages(self, thread_id):
        messages = self.client.beta.threads.messages.list(thread_id=thread_id)
        messages = list(messages)
        messages.reverse()
        result = []
        for message in messages:
            result.append(f"{message.role}: {message.content[0].text.value}")
        return result

assistant = Assistant(assistant_id, event_handler_factory)

chat_outputs = []

def chat_callback(answer):
    chat_outputs.append(answer)

answer = assistant.query("I want to know about the path of exile game.", chat_callback)

# assistant.get_messages(thread_id)
# print(answer)
print(chat_outputs)

on_tool_call_done > run status: requires_action
on_end > calling required action: search_by_wikipedia with arg {"query": "Path of Exile"}
on_end > calling required action: search_by_duckduckgo with arg {"query": "Path of Exile game overview"}


  ddgs_gen = ddgs.text(


on_tool_call_done > run status: requires_action
on_end > calling required action: scrape_web_page with arg {"url": "https://www.pathofexile.com/game"}
on_end > calling required action: scrape_web_page with arg {"url": "https://www.poewiki.net/wiki/Path_of_Exile"}
on_end > calling required action: scrape_web_page with arg {"url": "https://www.exitlag.com/blog/intro-to-path-of-exile-game-guide/"}
on_tool_call_done > run status: requires_action
on_end > calling required action: save_to_file with arg {"text":"# Path of Exile Game Overview\n\n## General Information\nPath of Exile is a free-to-play action role-playing video game developed and published by Grinding Gear Games. The game was officially released on October 23, 2013, after a successful open beta phase. It is available on multiple platforms including Microsoft Windows, macOS, Xbox One, PlayStation 4, and PlayStation 5. A sequel, Path of Exile 2, is set to release in early access on December 6, 2024.\n\n## Setting and Story\nThe ga

In [40]:
answer = assistant.query("I want to know about the house of cards tv show.", chat_callback)

print(chat_outputs)

on_tool_call_done > run status: requires_action
on_end > calling required action: search_by_wikipedia with arg {"query": "House of Cards (TV series)"}




  lis = BeautifulSoup(html).find_all('li')
  ddgs_gen = ddgs.text(


on_end > calling required action: search_by_duckduckgo with arg {"query": "House of Cards TV show overview"}
on_tool_call_done > run status: requires_action
on_end > calling required action: scrape_web_page with arg {"url": "https://en.wikipedia.org/wiki/House_of_Cards_(American_TV_series)"}
on_end > calling required action: scrape_web_page with arg {"url": "https://houseofcards.fandom.com/wiki/House_of_Cards_(US_series)"}
on_tool_call_done > run status: requires_action
on_end > calling required action: save_to_file with arg {"text":"# House of Cards TV Show Overview\n\n## General Information\n**House of Cards** is an American political thriller television series created by Beau Willimon. It is based on the 1989 novel of the same name by Michael Dobbs and is an adaptation of the 1990 British series of the same name. The first season, consisting of 13 episodes, was released on February 1, 2013, on Netflix, making it the first original series produced by a studio for the streaming servic