In [23]:
# imports and setup
import openai
from duckduckgo_search import ddg
import re

api_key = ''
chat_model = 'gpt-3.5-turbo'
temperature = 1.0
top_p = 1.0
max_tokens = 2048 # llama's max sequence length
presence_penalty = 0.0
frequency_penalty = 0.0
logit_bias = {}

# change to None if you don't want to use any stop sequence
stop = ['Observation:']

openai.api_key = api_key

In [None]:
# This is an example on a chain of thought loop with tool usage (add support for whatever tool/api you want)

In [62]:
def parse_action(text):
    try:
        # Regular expression pattern to match the last "Action" and "Action Input" in the text
        pattern = r'Action: ([^\n]*\n)+Action Input: ([^\n]*)$'

        match = re.search(pattern, text, re.MULTILINE)

        if match:
            last_action = match.group(1).strip()
            last_action_input = match.group(2).strip()
            if last_action.lower() == 'web search':
                print('Searching for: ' + last_action_input + '...')
                results = ddg(keywords=last_action_input, safesearch='Off', time=None, max_results=5)
                out = '{'
                for result in results:
                    out += 'title: ' + result['title'] + ',\n\tbody: ' + result['body'] + ',\n\t'
                return out.strip() + '}'
            elif last_action.lower() == 'calculator':
                print('Calculating: ' + last_action_input + '...')
                # rough example of a calculator
                return eval(last_action_input)
            else:
                return None
        else:
            return None
    except:
        return None

In [55]:
# define prompt (adapted from gpt-4 paper)
prompt = """Answer the following questions as best as you can. You have access to the following tools:
Web Search: Searches the web for the given search query.
Calculator: Performs basic arithmetic operations.
Use the following format:
Question: the input question you must answer
Thought: you should always think about what to do
Action: the action you to take, should be one of [Web Search, Wolfram Alpha, Calculator]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Observation loop can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question
Begin!
Question: {question}"""

In [5]:
# load model_inputs if applicable
questions = [
    'How many moons are in the solar system?',
    'What is the capital of France?',
    'What is the latest ml research?',
]

In [60]:
# loop through questions
responses = []
for question in questions:
    # Initial response
    user_messages = [{
        'role': 'user',
        'content': prompt.format(question=question)
    }]
    response = openai.ChatCompletion.create(
        model=chat_model,
        messages=user_messages,
        temperature=temperature,
        top_p=top_p,
        n=1,
        stream=False,
        stop=stop,
        max_tokens=max_tokens,
        presence_penalty=presence_penalty,
        frequency_penalty=frequency_penalty,
        logit_bias=logit_bias,
        user=''
    )

    # action loop
    next_message = ''
    while True:
        action_result = parse_action(response.choices[0].message.content)
        if next_message != '':
            next_message = next_message + ' '
        if action_result is None:
            break
        next_message += response.choices[0].message.content.strip() + '\nObservation: ' + action_result + '\nThought:'
        # get the next action
        response = openai.ChatCompletion.create(
            model=chat_model,
            messages=[user_messages[0], {'role': 'assistant', 'content': next_message}],
            temperature=temperature,
            top_p=top_p,
            n=1,
            stream=False,
            stop=stop,
            max_tokens=max_tokens,
            presence_penalty=presence_penalty,
            frequency_penalty=frequency_penalty,
            logit_bias=logit_bias,
            user=''
        )
        
    responses.append({'question': question, 'answer': next_message + response.choices[0].message.content.strip()})

Searching for: "how many moons are in the solar system"...
Searching for: "how many moons are in the solar system in total"...


In [61]:
print(responses[0]['answer'])

Thought: I don't think I know the answer to this question off the top of my head.
Action: Web Search
Action Input: "how many moons are in the solar system"
Observation: {title: Overview | Moons - NASA Solar System Exploration,
	body: How Many Moons Are There in the Solar System? The traditional moon count most people are familiar with stands at 226: One moon for Earth; Two for Mars; 95 at Jupiter; 83 at Saturn; 27 at Uranus; 14 at Neptune; and 5 for dwarf planet Pluto.According to NASA/JPLs Solar System Dynamics team, astronomers have documented another 462 moons orbiting smaller objects, such as asteroids, dwarf ...,
	title: How Many Moons? | NASA Space Place - NASA Science for Kids,
	body: Uranus and Neptune. Uranus has 27 moons that we know of. Some of them are half made of ice. Lastly, Neptune has 14 named moons. One of Neptunes moons, Triton, is as big as dwarf planet Pluto. To learn more about the moons in our solar system, visit the NASA Solar System Exploration moons page. arti

In [None]:
# Then you can just create the training dataset by doing combining the question and answer in to one prompt in the training script data loading portion.