In [1]:
from langchain.tools import DuckDuckGoSearchResults, WikipediaQueryRun
from langchain.utilities import WikipediaAPIWrapper
from langchain.document_loaders import WebBaseLoader
from pathlib import Path
from openai import OpenAI
import json
import os

def search_by_duckduckgo(inputs):
    query = inputs["query"]
    ddg = DuckDuckGoSearchResults()
    return ddg.run(query)

def search_by_wikipedia(inputs):
    query = inputs["query"]
    wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
    return wiki.run(query)

def scrape_web_page(inputs):
    url = inputs["url"]
    loader = WebBaseLoader([url])
    docs = loader.load()
    text = "\n\n".join([doc.page_content for doc in docs])
    return text

def save_to_file(inputs):
    output_dir = "./output"
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    text = inputs["text"]
    with open(f"{output_dir}/output.txt", "w") as file:
        file.write(text)
    return f"Text saved to {output_dir}/output.txt"

functions_map = {
    "search_by_duckduckgo": search_by_duckduckgo,
    "search_by_wikipedia": search_by_wikipedia,
    "scrape_web_page": scrape_web_page,
    "save_to_file": save_to_file,
}

functions = [
    {
        "type": "function",
        "function": {
            "name": "search_by_duckduckgo",
            "description": "Given a query, returns a URL of relevant websites from DuckDuckGo.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "Query to search for",
                    }
                },
                "required": ["query"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "search_by_wikipedia",
            "description": "Given a query, returns the summary of the query from Wikipedia.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "Query to search for",
                    }
                },
                "required": ["query"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "scrape_web_page",
            "description": "Given a URL, returns the content of the web page.",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {
                        "type": "string",
                        "description": "URL of the web page",
                    },
                },
                "required": ["url"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "save_to_file",
            "description": "Given a text content, saves it to a file.",
            "parameters": {
                "type": "object",
                "properties": {
                    "text": {
                        "type": "string",
                        "description": "Text content to save",
                    },
                },
                "required": ["text"],
            },
        },
    },
]

instructions = """
    You are a research expert.
    Your task is to use Wikipedia or DuckDuckGo to gather comprehensive and accurate information about the query provided. 
    When you find a relevant website through DuckDuckGo, you must scrape the content from that website. Use this scraped content to thoroughly research and formulate a detailed answer to the question. 
    Combine information from Wikipedia, DuckDuckGo searches, and any relevant websites you find. Ensure that the final answer is well-organized and detailed, and include citations with links (URLs) for all sources used.
    Your research should be saved to a .txt file, and the content should match the detailed findings provided. Make sure to include all sources and relevant information.
    The information from Wikipedia must be included.
    Ensure that the final .txt file contains detailed information, all relevant sources, and citations.
"""

client = OpenAI()

# assistant = client.beta.assistants.create(
#     name="Research Expert",
#     instructions=instructions,
#     model="gpt-4o-mini",
#     tools=functions,
# )

assistant_id = os.environ.get("OPENAI_ASSISTANT_ID")
print(assistant_id)

asst_yei8drdmPyp7k2jYkeLwqIIv


In [2]:
thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "I want to know about the history of the house music.",
        }
    ]
)

In [3]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant_id,
    temperature=0.1
)

In [68]:
def get_run(run_id, thread_id):
    return client.beta.threads.runs.retrieve(
        run_id=run_id,
        thread_id=thread_id,
    )

def send_message(thread_id, content):
    return client.beta.threads.messages.create(
        thread_id=thread_id, role="user", content=content
    )

def get_messages(thread_id):
    messages = client.beta.threads.messages.list(thread_id=thread_id)
    messages = list(messages)
    messages.reverse()
    for message in messages:
        print(f"{message.role}: {message.content[0].text.value}")

def get_tool_outputs(run_id, thread_id):
    run = get_run(run_id, thread_id)
    outputs = []
    for action in run.required_action.submit_tool_outputs.tool_calls:
        action_id = action.id
        function = action.function
        print(f"Calling function: {function.name} with arg {function.arguments}")
        outputs.append(
            {
                "output": functions_map[function.name](json.loads(function.arguments)),
                "tool_call_id": action_id,
            }
        )
    return outputs

def submit_tool_outputs(run_id, thread_id):
    outputs = get_tool_outputs(run_id, thread_id)
    return client.beta.threads.runs.submit_tool_outputs(
        run_id=run_id, thread_id=thread_id, tool_outputs=outputs
    )

In [69]:
submit_tool_outputs(run.id, thread.id)

Calling function: save_to_file with arg {"text":"# History of House Music\n\nHouse music is a genre of electronic dance music that originated in the early 1980s in Chicago, Illinois. It is characterized by a repetitive four-on-the-floor beat and a tempo typically ranging from 115 to 130 beats per minute. The genre emerged from the underground club culture of Chicago, particularly among Black and LGBTQ+ communities, and was heavily influenced by disco, funk, and soul music.\n\n## Origins\nThe roots of house music can be traced back to the late 1970s, during a time when disco was at its peak. Clubs like the Warehouse and the Power Plant in Chicago became hotspots for DJs and music producers who sought to create a new sound that reflected the urban experience. Frankie Knuckles, often referred to as the \"Godfather of House Music,\" was a key figure in this scene, spinning records at the Warehouse and experimenting with mixing techniques and drum machines.\n\nThe term \"house music\" is be

Run(id='run_oHFLwzjWZQ0lt5rpqeTQ4Tka', assistant_id='asst_yei8drdmPyp7k2jYkeLwqIIv', cancelled_at=None, completed_at=None, created_at=1735476130, expires_at=1735476730, failed_at=None, incomplete_details=None, instructions='\n    You are a research expert.\n    Your task is to use Wikipedia or DuckDuckGo to gather comprehensive and accurate information about the query provided. \n    When you find a relevant website through DuckDuckGo, you must scrape the content from that website. Use this scraped content to thoroughly research and formulate a detailed answer to the question. \n    Combine information from Wikipedia, DuckDuckGo searches, and any relevant websites you find. Ensure that the final answer is well-organized and detailed, and include citations with links (URLs) for all sources used.\n    Your research should be saved to a .txt file, and the content should match the detailed findings provided. Make sure to include all sources and relevant information.\n    The information fr

In [65]:
get_run(run.id, thread.id).status

'requires_action'

In [51]:
get_messages(thread.id)

user: I want to know about the history of the house music.
