In [2]:
import openai
import numpy as np
import json
import os

OPENAI_KEY = os.getenv("OPENAI_API_KEY")
openai.api_key = OPENAI_KEY

In [3]:
# with open("objects.txt") as f:
#     objects = [line.rstrip() for line in f]

# with open("actions.txt") as f:
#     actions = [line.rstrip() for line in f]

# 1. given the task, figure out what kinds of objects we might need with LLM
# 2. see if we have every object in sight (or see how similar our objects are)
# 3. tell LLM our limitations (if any)
# 4. ask for instructions given the exact objects we have 

In [6]:
# given a task, returns the objects and their imporances needed for a specific task
def get_objects_needed(task):
    if not task:
        return []
    
    def run_query():
        chat_lm_id = "gpt-3.5-turbo"
        # translation_lm_id = "text-embedding-ada-002"
        system_message = """
        You are a large language model tasked with planning a robot's actions.
        """
        def command(sample_task):
            return f"""
            list all objects i might need to interact with to perform the task '{sample_task}'? 
            only list necessary objects, as few as possible, even 1 object is possible.
            write each object in as few words as possible with no ambiguity. list each object in as few words as possible. 
            next to each object give a score from 1 to 10 scoring its importance for the given task in the format [<object>:<score>,<object>:<score>, etc.]
            """
        original_messages=[
        {"role": "system", "content": system_message},
            {"role": "user", "content": command("take out the trash")},
            {"role": "assistant", "content": """trash can:10,trash bag:6,dumpster:8,"""},
            {"role": "user", "content": command("do the dishes")},
            {"role": "assistant", "content": """dishes:10,sink:10,soap:9,sponge:5,drying rack:2"""},
            {"role": "user", "content": command(task)},
        ]
        res = openai.ChatCompletion.create(
            model=chat_lm_id,
            messages=original_messages,
            temperature=0.8
        )
        # print(res)
        objects = res["choices"][0]["message"]["content"]
        objs_with_importance = []
        for obj_with_score in objects.split(","):
            splitted = obj_with_score.split(":")
            try:
                objs_with_importance.append((splitted[0].lstrip(" "), int(splitted[1])))
            except:
                raise ValueError(f"improper format: {obj_with_score}")
            
        return objs_with_importance
    
    for _ in range(10):
        try:
            return run_query()
        except:
            continue
    
    return []

In [10]:
get_objects_needed("go to bed")

[('bed', 10), ('blanket', 6), ('pillow', 6)]

Now we have the objects we need, our next task is to see if we see similar objects. we can find the objects in the scene and compare these text embeddings to the objects we have. 

If we see all the objects (similar beyond a threshold), we ask the model to generate a plan with these specific objects. 

Otherwise, we tell the model what we can't find and see if we can still perform the task. 

In [91]:
chat_lm_id = "gpt-3.5-turbo"
translation_lm_id = "text-embedding-ada-002"
system_message="""
You are a large language model for planning a robot's movements to achieve a given task. 
The user will provide you with a task and you will instruct the user (a robot) to perform the task.
The robot may move to an object, pick it up, or place it in a new location.
The robot may also move to a location, and place an object there.
Give a command in the form of "move to <object> and pick up <object> and place it in <object>".
Objects: trash can, cup, table, trash, dumpster, table
"""
original_messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": "Take out the trash."},
    {"role": "assistant", "content": """
    1. Move to the trash can
    2. Pick up the trash
    3. Move to the dumpster
    4. Place the trash in the dumpster
    """},
    {"role": "user", "content": "You have 10 cups. Arrange them to form a circle. give step by step instructions for each cup"},
]

In [11]:
original_messages.append(res_message.to_dict())
original_messages.append({
    "role": "user",
    "content": "Where can I find the trash can. I am currently in the bathroom."
})

In [6]:
res2 = openai.ChatCompletion.create(
    model=chat_lm_id,
    messages=original_messages
)