babyAGI from scratch

source:
1. https://community.openai.com/t/building-agent-from-scratch/240703/6
2. https://github.com/Troyanovsky/autonomous_agent_tutorial/blob/main/autonomous_agent_handson.ipynb
  * https://bootcamp.uxdesign.cc/a-comprehensive-and-hands-on-guide-to-autonomous-agents-with-gpt-b58d54724d50

TODO:
1. Create/use an API replay tool. Make it easy to replay API calls as it’s too slow to copy & paste these into the OpenAI playground.
2. Stream completions. Use the stream mode of the API to speed up dev cycles. You can quickly abort if a completion is off the rails.

Extensions:
* https://twitter.com/yoheinakajima/status/1666313838868992001


In [2]:
from dotenv import load_dotenv
load_dotenv(dotenv_path="../creds/.env")

True

In [3]:
from openai import AzureOpenAI

azure_endpoint = "https://cursor-gpt-4.openai.azure.com"
api_version="2024-02-15-preview"


client = AzureOpenAI(
        azure_endpoint=azure_endpoint,
        api_version=api_version,
    )

In [4]:
import time
import openai
# Goal: Design a simple AI Agent with no dependencies!
# This AI will NOT run forever.  It is also safe since it doesn't have API access beyond the OpenAI API.
#
# Usage: Just set your MainObjective, InitialTask, OPENAI_API_KEY at a minimum.
#
# Tips: Feel free to play with the temperature and run over and over for different answers.
#
# Inspired from BabyAGI: https://github.com/yoheinakajima/babyagi
# BabyAGI has many more features and bells and whistles.  But may be hard to understand for beginners.

# Goal configuration
MainObjective = "Become a machine learning expert." # overall objective
InitialTask = "Learn about tensors." # first task to research

# Note: As expected, GPT-4 gives much deeper answers.  But turbo is selected here as the default, so as there no cost surprises.
OPENAI_API_MODEL = "pjf-dpo-turbo-35" # use "gpt-4" or "gpt-3.5-turbo"
# deployment_name = "cursor-gpt-4"
# deployment_name = "pjf-dpo-turbo-35"

# Model configuration
OPENAI_TEMPERATURE = 0.7

# Max tokens that the model can output per completion
OPENAI_MAX_TOKENS = 1024


# print objective
print("*****OBJECTIVE*****")
print(f"{MainObjective}")


# dump task array to string
def dumpTask(task):
    d = "" # init
    for tasklet in task:
        d += f"\n{tasklet.get('task_name','')}"
    d = d.strip()
    return d


# inference using OpenAI API, with error throws and backoffs
def OpenAiInference(
    prompt: str,
    model: str = OPENAI_API_MODEL,
    temperature: float = OPENAI_TEMPERATURE,
    max_tokens: int = 1024,
):
    while True:
        try:
            # Use chat completion API
            response = "NOTHING"
            messages = [{"role": "system", "content": prompt}]
            
            response = client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature,
                max_tokens=max_tokens,
                n=1,
                stop=None,
            )
            return response.choices[0].message.content.strip()
        except openai.error.RateLimitError:
            print(
                "   *** The OpenAI API rate limit has been exceeded. Waiting 10 seconds and trying again. ***"
            )
            time.sleep(10)  # Wait 10 seconds and try again
        except openai.error.Timeout:
            print(
                "   *** OpenAI API timeout occured. Waiting 10 seconds and trying again. ***"
            )
            time.sleep(10)  # Wait 10 seconds and try again
        except openai.error.APIError:
            print(
                "   *** OpenAI API error occured. Waiting 10 seconds and trying again. ***"
            )
            time.sleep(10)  # Wait 10 seconds and try again
        except openai.error.APIConnectionError:
            print(
                "   *** OpenAI API connection error occured. Check your network settings, proxy configuration, SSL certificates, or firewall rules. Waiting 10 seconds and trying again. ***"
            )
            time.sleep(10)  # Wait 10 seconds and try again
        except openai.error.InvalidRequestError:
            print(
                "   *** OpenAI API invalid request. Check the documentation for the specific API method you are calling and make sure you are sending valid and complete parameters. Waiting 10 seconds and trying again. ***"
            )
            time.sleep(10)  # Wait 10 seconds and try again
        except openai.error.ServiceUnavailableError:
            print(
                "   *** OpenAI API service unavailable. Waiting 10 seconds and trying again. ***"
            )
            time.sleep(10)  # Wait 10 seconds and try again
        finally:
            pass
            # print(f"Inference Response: {response}")

# expound on the main objective given a task
def ExpoundTask(MainObjective: str, CurrentTask: str):

    print(f"****Expounding based on task:**** {CurrentTask}")

    prompt=(f"You are an AI who performs one task based on the following objective: {MainObjective}\n"
            f"Your task: {CurrentTask}\nResponse:")


    # print("################")
    # print(prompt)
    response = OpenAiInference(prompt, OPENAI_API_MODEL, OPENAI_TEMPERATURE, OPENAI_MAX_TOKENS)
    new_tasks = response.split("\n") if "\n" in response else [response]
    return [{"task_name": task_name} for task_name in new_tasks]



# generate a bunch of tasks based on the main objective and the current task
def GenerateTasks(MainObjective: str, TaskExpansion: str):
    prompt=(f"You are an AI who creates tasks based on the following MAIN OBJECTIVE: {MainObjective}\n"
            f"Create tasks pertaining directly to your previous research here:\n"
            f"{TaskExpansion}\nResponse:")
    response = OpenAiInference(prompt, OPENAI_API_MODEL, OPENAI_TEMPERATURE, OPENAI_MAX_TOKENS)
    new_tasks = response.split("\n") if "\n" in response else [response]
    task_list = [{"task_name": task_name} for task_name in new_tasks]
    new_tasks_list = []
    for task_item in task_list:
        # print(task_item)
        task_description = task_item.get("task_name")
        if task_description:
            # print(task_description)
            task_parts = task_description.strip().split(".", 1)
            # print(task_parts)
            if len(task_parts) == 2:
                new_task = task_parts[1].strip()
                new_tasks_list.append(new_task)

    return new_tasks_list

# Simple version here, just generate tasks based on the inital task and objective, then expound with GPT against the main objective and the newly generated tasks.
q = ExpoundTask(MainObjective,InitialTask)
ExpoundedInitialTask = dumpTask(q)

q = GenerateTasks(MainObjective, ExpoundedInitialTask)

TaskCounter = 0
for Task in q:
    TaskCounter += 1
    print(f"#### ({TaskCounter}) Generated Task ####")
    e = ExpoundTask(MainObjective,Task)
    print(dumpTask(e))

*****OBJECTIVE*****
Become a machine learning expert.
****Expounding based on task:**** Learn about tensors.
#### (1) Generated Task ####
****Expounding based on task:**** Research and write a summary of the different data types that tensors can store in machine learning, including their significance and use cases.
In machine learning, tensors are multi-dimensional arrays that can store various types of data. Tensors are the fundamental building blocks of most machine learning frameworks, and they can store different types of data, including scalar, vector, and matrix data.

1. Scalar Data:
Scalars are single numerical values, such as integers or floating-point numbers, and they are represented as tensors with zero dimensions. Scalar tensors are commonly used to store constants or individual data points in machine learning algorithms. For example, scalar tensors can be used to represent bias values in neural networks or as individual data points in statistical analysis.

2. Vector Data

Now with function calling

In [5]:
# %pip install arxiv

Collecting arxiv
  Downloading arxiv-2.1.0-py3-none-any.whl.metadata (6.1 kB)
Collecting feedparser==6.0.10 (from arxiv)
  Downloading feedparser-6.0.10-py3-none-any.whl.metadata (2.3 kB)
Collecting sgmllib3k (from feedparser==6.0.10->arxiv)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25ldone
Downloading arxiv-2.1.0-py3-none-any.whl (11 kB)
Downloading feedparser-6.0.10-py3-none-any.whl (81 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.1/81.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (setup.py) ... [?25ldone
[?25h  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6048 sha256=5b95e83d7a3adb82fbc9bde5f30937b0f0b190dfab4b2853a7cd69788bdfd53b
  Stored in directory: /Users/ogb/Library/Caches/pip/wheels/f0/69/93/a47e9d621be168e9e33c7ce60524393c0b92ae83cf6c6e89c5
Successfully built sgmllib3k
Install

In [19]:
import openai
import arxiv

"""
Wrap the OpenAI API call in this function
"""
def getResponse(prompt):
    response =  client.chat.completions.create(
            # model="pjf-dpo-turbo-35",
            model="cursor-gpt-4",
            temperature = 0, # We want consistent behavior, so we set a very low temperature
            messages=[
                {"role": "system", "content": "You're a helpful assistant. Carefully follow the user's instructions."},
                {"role": "user", "content": prompt}
            ]
        )
    
    # response = response['choices'][0]['message']['content']
    return response.choices[0].message.content.strip()

"""
Use GPT to determine the action to take by giving it the objective, memory, and tools.
If it think it has finished the objective, just give the answer.
If it needs more info, it will pick the tool to get the relevant information based on the tool description.
"""
def determineAction(objective, memory, tools):
    formattedPrompt = f"""Determine if the following memory is enough to answer\n
    the user's objective. Your past actions are stored in the memory for reference\n
    If it is enough, answer the question in the format: 'FINAL ANSWER: '. \n
    If the memory is not enough, you can use a tool in the available tools section\n
    to get more information. When using a tool you should use this format: \n
    'USE :'. If no tool can help you achieve the user's \n
    objective, then answer 'FINAL: CANNOT ANSWER'.

    ```Objective
    Answer: {objective}
    ```

    ```Memory
    {memory}
    ```

    ```Available Tools
    {tools}
    ```

    """
    response = getResponse(formattedPrompt)
    (finished, result, memory) = parseResponse(response, memory,tools)
    return (finished, result, memory)

"""
Parse the response from GPT to determine if the objective is finished.
If it is finished, just give the final answer.
If the objective cannot be finished with the context and tools, it will say it cannot answer
If GPT picks a tool, execute the tool and save the result of the tool in memory.
"""
def parseResponse(response, memory,tools):
    finished = False
    print("---RESPONSE---", response)
    # "USE : searchArxiv('ReAct reasoning and acting in language models')"

    if response.startswith('FINAL ANSWER:'):
        finished = True
        memory.append(response)
        return (finished, response, memory)
    elif response == 'FINAL: CANNOT ANSWER':
        finished = True
        memory.append(response)
        return (finished, response, memory)
    elif response.startswith('USE '):
        # split the string using ':' as the delimiter
        parsed_str = response.split(':')
        print("parsed string", parsed_str)
        #['USE ', " searchArxiv('React reasoning')"]


        # get the tool name and parameter
        # tool_name = parsed_str[1].split("(")[1]
        # parameter = parsed_str[1]

        tool_name = parsed_str[1].split("(")[0].strip()
        # tool_name = tool_name_with_extra.split("'")[0].strip()

        parameter_with_extra = parsed_str[1].split("(")[1]
        parameter = parameter_with_extra.split("'")[1].strip()

        print("Tool Name:", tool_name)
        print("Parameter:", parameter)

        print("THOUGHT: " + response)
        memory.append("THOUGHT: " + response)

        result = executeTool(tool_name, parameter,tools)

        new_memory = "OBSERVATION: " + str(result)
        print(new_memory)
        memory.append(new_memory)

        return (finished, result, memory)

"""
Execute the tool that GPT picks using the parameter it gives.
Returns the execution result so that GPT can have the relevant info.
"""
def executeTool(tool_name, parameter,tools):
    # Find the tool with the given name
    tool = None
    for t in tools:
        if t['tool_name'] == tool_name:
            tool = t
            break
    
    # If the tool is found, execute its function with the given parameter
    if tool:
        return tool['function_name'](parameter)
    else:
        return "Tool not found"


"""
Wrap the search arxiv function as a tool for GPT
Input is a search keyword
Output is a list of dictionaries with title, published date, authors, and summary of papers
"""
def searchArxiv(keyword):
    # Perform a search with the given query
    search = arxiv.Search(query=keyword, max_results=3)
    
    # Get the metadata for each result and extract relevant information
    results = []
    for result in search.results():
        title = result.title
        published_date = result.published.strftime("%Y-%m-%d")
        authors = ", ".join(author.name for author in result.authors)
        summary = result.summary
        
        # Store the extracted information as a dictionary
        results.append((
            "title: " + title,
            "published_date: " + published_date,
            "authors: " + authors,
            "summary: " + summary
        ))
    
    # Return the list of tuples containing the result information
    return results

"""
Initialize memory, tools for the GPT agent.
Ask for a user objective and let it run iteratively untill the objective is achieved.
As a safety measure, it will also stop after 5 iterations just in case things go wrong.
"""
def startAgent():
    objective = input("What is your research question? ")
    # For simplicity, we will just use a list to store every thing. 
    # For production, you will probably use vector databases.
    memory = []

    tools = [{'tool_name': 'searchArxiv', 
            'description': """You can use this tool to search for scientific papers on Arxiv. The response will have title, author, published date, and summary.""", 
            'function_name' : searchArxiv,
            'parameter': 'search key word'}]
    
    n = 0
    while True:
        (finished, result, memory) = determineAction(objective, memory, tools)
        n += 1

        if finished:
            print(result)
            return
        
        if n > 5:
            print("Ended for reaching limit.")
            return


startAgent()
# try: what is Reflexion for verbal reinforcement learning paper about?


---RESPONSE--- USE : searchArxiv('Reflexion for verbal reinforcement learning')
parsed string ['USE ', " searchArxiv('Reflexion for verbal reinforcement learning')"]
Tool Name: searchArxiv
Parameter: Reflexion for verbal reinforcement learning
THOUGHT: USE : searchArxiv('Reflexion for verbal reinforcement learning')


  for result in search.results():


OBSERVATION: [('title: Reflexion: Language Agents with Verbal Reinforcement Learning', 'published_date: 2023-03-20', 'authors: Noah Shinn, Federico Cassano, Edward Berman, Ashwin Gopinath, Karthik Narasimhan, Shunyu Yao', 'summary: Large language models (LLMs) have been increasingly used to interact with\nexternal environments (e.g., games, compilers, APIs) as goal-driven agents.\nHowever, it remains challenging for these language agents to quickly and\nefficiently learn from trial-and-error as traditional reinforcement learning\nmethods require extensive training samples and expensive model fine-tuning. We\npropose Reflexion, a novel framework to reinforce language agents not by\nupdating weights, but instead through linguistic feedback. Concretely,\nReflexion agents verbally reflect on task feedback signals, then maintain their\nown reflective text in an episodic memory buffer to induce better\ndecision-making in subsequent trials. Reflexion is flexible enough to\nincorporate various

TODO:
* More reliable output using Instructor (https://jxnl.github.io/instructor/)