In [1]:
import re
import time
import traceback
import threading
from io import StringIO
from contextlib import redirect_stdout
from IPython.display import clear_output

from tools import WebSearch, WebBrowser, python_runner
from assistant import OpenAIAssistant

# Setup

In [2]:
# OpenAIAssistant
# With long term memory
# You must have Qdrant running on localhost:6333
# docker run -p 6333:6333 -v <your_save_dir_here>:/qdrant/storage qdrant/qdrant
api_key = ''
system_prompt = None
debug = False
use_long_term_memory = False
use_short_term_memory = False
use_knowledge_retrieval = False
summarize_short_term_memory = False
summarize_long_term_memory = False
summarize_knowledge_retrieval = False
short_term_memory_max_tokens = 750
long_term_memory_max_tokens = 500
knowledge_retrieval_max_tokens = 1000
short_term_memory_summary_max_tokens = 300
long_term_memory_summary_max_tokens = 300
knowledge_retrieval_summary_max_tokens = 600
long_term_memory_collection_name = 'long_term_memory'

assistant = OpenAIAssistant(api_key, system_prompt=system_prompt, long_term_memory_collection_name=long_term_memory_collection_name, use_long_term_memory=use_long_term_memory, use_short_term_memory=use_short_term_memory, memory_manager=None, debug=debug, summarize_short_term_memory=summarize_short_term_memory, summarize_long_term_memory=summarize_long_term_memory, short_term_memory_max_tokens=short_term_memory_max_tokens, long_term_memory_max_tokens=long_term_memory_max_tokens, short_term_memory_summary_max_tokens=short_term_memory_summary_max_tokens, long_term_memory_summary_max_tokens=long_term_memory_summary_max_tokens, use_knowledge_retrieval=use_knowledge_retrieval, summarize_knowledge_retrieval=summarize_knowledge_retrieval, knowledge_retrieval_max_tokens=knowledge_retrieval_max_tokens, knowledge_retrieval_summary_max_tokens=knowledge_retrieval_summary_max_tokens)
search = WebSearch()
browser = WebBrowser()

In [3]:
def truncate_text(assistant, text, max_length=500, side='right'):
    if side == 'right':
        return assistant.enc.decode(assistant.enc.encode(text)[:max_length])
    else:
        return assistant.enc.decode(assistant.enc.encode(text)[-max_length:])

cache = {}
def parse_action(text, max_tokens=500):
    try:
        # Regular expression pattern to match the last "Action" and "Action Input" in the text
        pattern = r'Action: ([^\n]*\n)+Action Input: ([^\n]*)$'

        match = re.search(pattern, text, re.MULTILINE)

        summarize_prompt = 'Summarize the following text while trying to stay under 500 words. Include all important and relevant information:\n{text}'
        chatgpt = OpenAIAssistant(api_key, system_prompt=None, use_long_term_memory=False, use_short_term_memory=False, memory_manager=None, use_knowledge_retrieval=False)
        max_tokens_for_prompt = 3500

        if match:
            last_action = match.group(1).strip()
            last_action_input = match.group(2).strip()
            if cache.get(last_action+last_action_input):
                return cache[last_action+last_action_input]
            if last_action.lower() == 'web search':
                last_action_input = last_action_input.strip('\"') # remove quotes to receive better search results
                print('Searching for: ' + last_action_input + '...')
                results = search.search(keywords=last_action_input, safesearch='Off', time=None, max_results=10, cache=True)
                out = '{'
                for result in results:
                    out += 'title: ' + result['title'] + ',\n\tbody: ' + result['body'] + ',\n\t' + 'url: ' + result['href'] + ',\n\t'
                content = truncate_text(chatgpt, out.strip(), max_tokens) + '}'
                cache[last_action+last_action_input] = content
                return content
            elif last_action.lower() == 'open url':
                print('Opening URL: ' + last_action_input + '...')
                contents = browser.parse_page(url=last_action_input.strip())
                content = truncate_text(chatgpt, str(contents), max_tokens)[:-1] + '}'
                cache[last_action+last_action_input] = content
                return content
            elif last_action.lower() == 'ask chatgpt':
                print('Asking ChatGPT: ' + last_action_input + '...')
                response = chatgpt.get_chat_response(last_action_input)
                content = truncate_text(chatgpt, response.choices[0].message.content.strip(), max_tokens)
                cache[last_action+last_action_input] = content
                return content
            elif last_action.lower() == 'get readable content':
                print(f'Getting readable content for {last_action_input}...')
                contents = str(browser.get_readable_content(url=last_action_input.strip()))
                if chatgpt.calculate_num_tokens(contents) > max_tokens:
                    summary_prompt = summarize_prompt.format(text=contents)
                    # Trim to max_tokens_for_prompt to add padding for the response
                    summary_prompt = chatgpt.enc.decode(chatgpt.enc.encode(summary_prompt)[:max_tokens_for_prompt])
                    contents = '{summarized content: ' + chatgpt.get_chat_response(summary_prompt).choices[0].message.content.strip() + '}'
                content = truncate_text(chatgpt, contents[:-1], max_tokens) + '}'
                cache[last_action+last_action_input] = content
                return content
            elif last_action.lower() == 'get internal links':
                print(f'Getting internal links for {last_action_input}...')
                contents = browser.get_internal_links(url=last_action_input.strip())['internal_links']
                content = truncate_text(chatgpt, str(contents), max_tokens)[:-1] + ']'
                cache[last_action+last_action_input] = content
                return content
            elif last_action.lower() == 'get external links':
                print(f'Getting external links for {last_action_input}...')
                contents = browser.get_external_links(url=last_action_input.strip())['external_links']
                content = truncate_text(chatgpt, str(contents), max_tokens)[:-1] + ']'
                cache[last_action+last_action_input] = content
                return content
            elif last_action.lower() == 'go back':
                print('Going back...')
                browser.back()
                return 'Done'
            elif last_action.lower() == 'go forward':
                print('Going forward...')
                browser.forward()
                return 'Done'
            elif last_action.lower() == 'parse page':
                print(f'Parsing page: {last_action_input}...')
                contents = browser.parse_page(url=last_action_input.strip())
                content = truncate_text(chatgpt, str(contents), max_tokens)
                cache[last_action+last_action_input] = content
                return content
            elif last_action.lower() == 'run python code':
                print(f'Running Python code: {last_action_input}...')
                content = truncate_text(chatgpt, python_runner(last_action_input.strip('"')).strip(), max_tokens)
                cache[last_action+last_action_input] = content
                return content
            else:
                return 'Action does not exist'
        else:
            return 'Invalid action format. Remember to include "Action:" and "Action Input:"'
    except Exception as e:
        # return the error message
        return '{error: ' + str(e) + '}'
    

def parse_action_with_timer(text, max_tokens=500, timeout=120.0):
    result = {'value': None}
    finished = threading.Event()

    def target():
        nonlocal result
        result['value'] = parse_action(text, max_tokens)
        finished.set()

    thread = threading.Thread(target=target)
    thread.start()
    finish = finished.wait(timeout)

    if not finish:
        return 'Error: Function execution timed out.'
    return result['value']

def web_search_with_timer(text, timeout=120.0):
    result = {'value': None}
    finished = threading.Event()

    def target():
        nonlocal result
        results = search.search(keywords=text, safesearch='Off', time=None, max_results=10, cache=True)
        out = '{'
        for result_ in results:
            out += 'title: ' + result_['title'] + ',\n\tbody: ' + result_['body'] + ',\n\t' + 'url: ' + result_['href'] + ',\n\t'
        result['value'] = out.strip() + '}'
        finished.set()

    thread = threading.Thread(target=target)
    thread.start()
    finish = finished.wait(timeout)

    if not finish:
        return 'Error: Function execution timed out.'
    return result['value']

In [4]:
def action_loop(assistant, user_prompt, stop=['Observation:'], max_observation_tokens=500, memory_tokens=3000, max_loops=6, sleep=5):
    response = assistant.get_chat_response(prompt=user_prompt, stop=stop)
    clear_output(wait=True)
    print(response.choices[0].message.content.strip())

    # action loop
    next_message = ''
    counter = max_loops
    while True:
        if counter < max_loops:
            time.sleep(sleep)
        if len(response.choices[0].message.content.split('Final Answer: ')) > 1:
            break
        action_result = parse_action_with_timer(response.choices[0].message.content, max_tokens=max_observation_tokens)
        if next_message != '':
            next_message = next_message + ' '
        next_message += response.choices[0].message.content.strip() + '\nObservation: ' + action_result + '\nThought:'
        counter_ = min(max_loops, 10)
        while assistant.calculate_num_tokens(next_message) > memory_tokens:
            pattern = r'Thought:.*\nAction:.*\nAction Input:.*\nObservation:.*\n'

            # Remove the oldest entries using re.sub()
            next_message = re.sub(pattern, '', next_message, count=1)
            counter_ -= 1
            if counter_ == 0:
                next_message = truncate_text(assistant, next_message, memory_tokens, side='left')
                break
        # get the next action
        response = assistant.get_chat_response(prompt=None, stop=stop, inject_messages=[{0: {'role': 'user', 'content': user_prompt}}, {1: {'role': 'assistant', 'content': next_message}}])

        clear_output(wait=True)
        print(next_message + response.choices[0].message.content.strip())
        if counter == 0:
            break
        counter -= 1
        
    return response.choices[0].message.content.split('Final Answer: ')[-1].strip()

In [5]:
prompt = """Respond to the following prompt as best as you can. You have access to the following tools:
Web Search: Searches the web for the given search query.
Get Readable Content: Returns the readable content of the given url.
Get Internal Links: Returns the internal links of the given url.
Run Python Code: Runs the given Python code. Must be one line.
Ask ChatGPT: Ask ChatGPT a question or give it a prompt for a response.
Use the following format:
Prompt: the prompt you are responding to
Thought: you should always think about what to do
Action: the action you want to take, must be one of [Web Search, Get Readable Content, Get Internal Links, Run Python Code, Ask ChatGPT]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Observation loop can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question
Begin!
Prompt: {prompt}"""

# Action Loop

In [6]:
# Example
latest_news_summary = action_loop(assistant, prompt.format(prompt=f"""Find the latest in ai news and then give me a 500-word summary. No python code allowed.
Final Answer must be in the following format:
Final Answer: <h1>...</h1>
<p>...</p>
<h2>...</h2>
<p>...</p>
..."""))

Thought: I need to find a reliable source for AI news and then summarize it into a 500-word format.
Action: Web Search
Action Input: "latest AI news"
Observation: {title: Artificial Intelligence News -- ScienceDaily,
	body: Artificial Intelligence News March 23, 2023 Top Headlines Head-Worn Device Can Control Mobile Manipulators Mar. 20, 2023 — New research aims to increase autonomy for individuals with such motor...,
	url: https://www.sciencedaily.com/news/computers_math/artificial_intelligence/,
	title: Artificial intelligence | MIT News | Massachusetts Institute of Technology,
	body: Integrating humans with AI in structural design A process that seeks feedback from human specialists proves more effective at optimization than automated systems working alone. March 2, 2023 Read full story MIT-Takeda Program heads into fourth year with crop of 10 new projects,
	url: https://news.mit.edu/topic/artificial-intelligence2,
	title: AI News - Artificial Intelligence News,
	body: Latest News E

In [7]:
print(latest_news_summary)

<h1>Latest Developments in AI</h1>
<p>Artificial intelligence has been making headlines lately with rapid development across various industries. The latest developments are significant and indicate the potential for AI to drive innovation and revolutionise businesses.</p>
<h2>AI in the UK and the US</h2>
<p>The UK government’s recent Spring Budget presented significant support for the country’s AI industry. Already Europe’s leader in AI after the US and China, businesses in the UK are set to benefit from the budget. However, Microsoft faced criticism after layoffs left fewer experts on ethical and responsible AI development in the company.</p>
<p>In the US, AI is being explored as a way to improve people’s lives. The US and the EU recently signed an agreement to use AI to explore areas that can benefit from a more focused AI breakthrough. However, there are also concerns about the national security threat posed by China's unconstrained AI program.</p>
<h2>AI in User Experience</h2>
<p>