In [67]:
import openai as client
import json
import requests
from bs4 import BeautifulSoup
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.retrievers import WikipediaRetriever

In [138]:
def format_docs(docs):
    return "\n\n".join(document.page_content for document in docs)

def duckduckgo_search(inputs):
    ddg = DuckDuckGoSearchAPIWrapper()
    query = inputs['query']
    return ddg.run(query)

def wikipedia_search(inputs):
    query = inputs['query']
    retriever = WikipediaRetriever(top_k_results=5, lang="en")
    docs = retriever.get_relevant_documents(query)
    return format_docs(docs)

def website_scrape(inputs):
    url = inputs['url']
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    # 모든 <p> 태그의 텍스트를 추출하고 결합
    content = ' '.join([p.text for p in soup.find_all('p')])
    return content

def save_to_file(inputs):
    data = inputs['data']
    with open(f"search_result.txt", "w") as f:
        f.write(data)
    return f"Information saved in result.txt file."


functions = [
    {
        "type": "function",
        "function": {
            "name": "duckduckgo_search",
            "description": "Searches DuckDuckGo for the given query",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The query to search for",
                    }
                },
                "required": ["query"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "wikipedia_search",
            "description": "Searches Wikipedia for the given query",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The query to search for",
                    }
                },
                "required": ["query"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "website_scrape",
            "description": "Scrapes the content of the given website",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {
                        "type": "string",
                        "description": "The URL of the website to scrape",
                    }
                },
                "required": ["url"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "save_to_file",
            "description": "Saves the given data to a file",
            "parameters": {
                "type": "object",
                "properties": {
                    "data": {
                        "type": "string",
                        "description": "The data to save",
                    }
                },
                "required": ["data"],
            },
        },
    },
]

functions_map = {
    "duckduckgo_search": duckduckgo_search,
    "wikipedia_search": wikipedia_search,
    "website_scrape": website_scrape,
    "save_to_file": save_to_file,
}

In [143]:
# assistant = client.beta.assistants.create(
#     name="Research Assistant",
#     instructions="""
#             You are a research specialist.
#             Take a deep breath and proceed with the following step.
#             1. Search for the information about a query using DuckDuckGo.
#             2. Search for the information about a query using Wikipedia.
#             3. Extract the content of a website if any url is included in the search result.
#             4. Save all the search results in a text file, and it should contain all the information you have found in step 1, 2, and 3.
#             """,
#     model="gpt-4-turbo-preview",
#     tools=functions,
# )

# assistant

assistant_id = "asst_FH0WYs6IBxzqPWDhZmhvX2fn"

In [144]:
thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Research about the XZ backdoor",
        }
    ]
)

thread

Thread(id='thread_lPONvuP5DMhm5hHts18b2zn8', created_at=1713007947, metadata={}, object='thread')

In [145]:
def get_run(run_id, thread_id):
    return client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)


def get_tool_outputs(run_id, thread_id):
    run = get_run(run_id, thread_id)
    outputs = []
    for action in run.required_action.submit_tool_outputs.tool_calls:
        function = action.function
        print(f"Calling function: {function.name} with arg {function.arguments}")
        outputs.append(
            {
                "output": functions_map[function.name](json.loads(function.arguments)),
                "tool_call_id": action.id,
            }
        )
    return outputs


def submit_tool_outputs(run_id, thread_id):
    outputs = get_tool_outputs(run_id, thread_id)
    return client.beta.threads.runs.submit_tool_outputs(
        run_id=run_id,
        thread_id=thread_id,
        tool_outputs=outputs,
    )


def get_messages(thread_id):
    messages = client.beta.threads.messages.list(thread_id=thread_id)
    messages = list(messages)
    messages.reverse()
    for message in messages:
        print(f"{message.role}: {message.content[0].text.value}")


def send_message(thread_id, content):
    client.beta.threads.messages.create(
        thread_id=thread_id, role="user", content=content
    )

In [157]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant_id,
)
run


Run(id='run_FuhrSDcxGB1CpDufaqyvAbLA', assistant_id='asst_FH0WYs6IBxzqPWDhZmhvX2fn', cancelled_at=None, completed_at=None, created_at=1713008746, expires_at=1713009346, failed_at=None, file_ids=[], instructions='\n            You are a research specialist.\n            Take a deep breath and proceed with the following step.\n            1. Search for the information about a query using DuckDuckGo.\n            2. Search for the information about a query using Wikipedia.\n            3. Extract the content of a website if any url is included in the search result.\n            4. Save all the search results in a text file, and it should contain all the information you have found in step 1, 2, and 3.\n            ', last_error=None, metadata={}, model='gpt-4-turbo-preview', object='thread.run', required_action=None, started_at=None, status='queued', thread_id='thread_lPONvuP5DMhm5hHts18b2zn8', tools=[FunctionTool(function=FunctionDefinition(name='duckduckgo_search', description='Searches 

In [166]:
get_run(run.id, thread.id).status


'completed'

In [165]:
submit_tool_outputs(run.id, thread.id)

Calling function: save_to_file with arg {"data":"DuckDuckGo Search Results:\n- Ethereum is a decentralized platform enabling smart contracts and powered by the cryptocurrency ether (ETH). Offers a comparison with Bitcoin, details on its proof-of-stake consensus mechanism, and notes on its applications and challenges.\n- Provides links to Ethereum's Whitepaper, latest price, trading information, and market updates.\n\nWikipedia Search Results:\nEthereum is a decentralized blockchain platform facilitating smart contract functionality, with Ether (ETH) as its native cryptocurrency. Founded in 2013 by Vitalik Buterin, Ethereum went live in 2015 and is now second only to Bitcoin in terms of market capitalization. It enables users to deploy permanent and immutable decentralized applications, with support for decentralized finance (DeFi), the creation and exchange of non-fungible tokens (NFTs), and the utilization of the ERC-20 token standard for other cryptocurrencies. Following the signific

Run(id='run_FuhrSDcxGB1CpDufaqyvAbLA', assistant_id='asst_FH0WYs6IBxzqPWDhZmhvX2fn', cancelled_at=None, completed_at=None, created_at=1713008746, expires_at=1713009346, failed_at=None, file_ids=[], instructions='\n            You are a research specialist.\n            Take a deep breath and proceed with the following step.\n            1. Search for the information about a query using DuckDuckGo.\n            2. Search for the information about a query using Wikipedia.\n            3. Extract the content of a website if any url is included in the search result.\n            4. Save all the search results in a text file, and it should contain all the information you have found in step 1, 2, and 3.\n            ', last_error=None, metadata={}, model='gpt-4-turbo-preview', object='thread.run', required_action=None, started_at=1713008779, status='queued', thread_id='thread_lPONvuP5DMhm5hHts18b2zn8', tools=[FunctionTool(function=FunctionDefinition(name='duckduckgo_search', description='Sea

In [167]:
get_messages(thread.id)

user: Research about the XZ backdoor
assistant: The research about the XZ backdoor has been successfully conducted, and the findings have been saved in the **result.txt** file. This includes details from both DuckDuckGo and Wikipedia searches, providing a comprehensive overview of the incident, technical details, and the broader implications.
user: Now do research about ethereum
assistant: The research on Ethereum has been completed and the findings have been saved in the **result.txt** file. This file contains an extensive summary of Ethereum's background, development stages, technical mechanisms, major upgrades, and its vital place in the cryptocurrency landscape.


In [156]:
send_message(thread.id, "Now do research about ethereum")
