In [36]:
import openai
import numpy as np
import json
import os
from typing import List

OPENAI_KEY = os.getenv("OPENAI_API_KEY")
openai.api_key = OPENAI_KEY

In [37]:
# with open("objects.txt") as f:
#     objects = [line.rstrip() for line in f]

with open("actions.txt") as f:
    actions = [line.rstrip() for line in f]

# 1. given the task, figure out what kinds of objects we might need with LLM
# 2. see if we have every object in sight (or see how similar our objects are)
# 3. tell LLM our limitations (if any)
# 4. ask for instructions given the exact objects we have 
actions

['move to <object>',
 'pick up <object>',
 'put down <object>',
 'put <object> on <object>']

In [38]:
# given a task, returns the objects and their imporances needed for a specific task
def get_objects_needed(task):
    if not task:
        return []
    
    def run_query():
        chat_lm_id = "gpt-3.5-turbo"
        # translation_lm_id = "text-embedding-ada-002"
        system_message = """
        You are a large language model tasked with planning a robot's actions.
        """
        def command(sample_task):
            return f"""
            list all objects/agents i might need to interact with to perform the task '{sample_task}'? 
            only list necessary objects/agents, as few as possible, even 1 object is possible.
            write each object in as few words as possible with no ambiguity. list each object in as few words as possible. 
            next to each object give a score from 1 to 10 scoring its importance for the given task in the format [<object>:<score>,<object>:<score>, etc.]
            """
        original_messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": command("take out the trash")},
            {"role": "assistant", "content": """trash can:10,trash bag:6,dumpster:8,"""},
            {"role": "user", "content": command("do the dishes")},
            {"role": "assistant", "content": """dishes:10,sink:10,soap:9,sponge:5,drying rack:2"""},
            {"role": "user", "content": command(task)},
        ]
        res = openai.ChatCompletion.create(
            model=chat_lm_id,
            messages=original_messages,
            temperature=0.8
        )
        # print(res)
        objects = res["choices"][0]["message"]["content"]
        objs_with_importance = []
        for obj_with_score in objects.split(","):
            splitted = obj_with_score.split(":")
            try:
                objs_with_importance.append((splitted[0].lstrip(" "), int(splitted[1])))
            except:
                raise ValueError(f"improper format: {obj_with_score}")
            
        return objs_with_importance
    
    for _ in range(10):
        try:
            return run_query()
        except ValueError:
            continue
    
    return []

In [39]:
get_objects_needed("do the dishes")

[('dishes', 10), ('sink', 10), ('soap', 9), ('sponge', 5), ('drying rack', 2)]

In [46]:
# given a task and a list of relevant objects that a robot has identified, give a task plan
def get_steps_from_objects(task: str, objects: List[str]) -> List[str]:
    if not objects:
        return []
    
    def run_query():
        chat_lm_id = "gpt-3.5-turbo"
        # translation_lm_id = "text-embedding-ada-002"
        system_message = """
        You are a large language model tasked with planning a robot's actions.
        """
        def command(sample_task, sample_objects):
            return f"""
            List the steps I must take to perform the task '{sample_task}'? 
            I can only interact with the following objects: {", ".join(sample_objects)}.
            The actions I can perform are: {", ".join(actions)}
            Only use the actions and substitute the appropriate object in <object>
            Write each step in as few words as possible with little ambiguity
            """
        original_messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": command("take out the trash", ["trash can", "trash bag", "dumpster"])},
            {"role": "assistant", "content": """
            1. move to trash can
            2. pick up trash bag
            3. move to dumpster
            4. put down trash bag"""},
            {"role": "user", "content": command(task, objects)}
        ]
        res = openai.ChatCompletion.create(
            model=chat_lm_id,
            messages=original_messages,
            temperature=0.8
        )
        # print(res)
        return res
        
    return run_query()

In [48]:
res = get_steps_from_objects("do the dishes", ["sink", "dishes", "sponge", "drying rack"])
res["choices"][0]["message"]["content"]

'            1. move to sink\n            2. pick up dishes\n            3. put dishes in sink\n            4. put sponge on dishes\n            5. turn on water\n            6. wash dishes with sponge\n            7. turn off water\n            8. pick up dishes\n            9. put dishes in drying rack'

Now we have the objects we need, our next task is to see if we see similar objects. we can find the objects in the scene and compare these text embeddings to the objects we have. 

If we see all the objects (similar beyond a threshold), we ask the model to generate a plan with these specific objects. 

Otherwise, we tell the model what we can't find and see if we can still perform the task. 

In [11]:
# original_messages.append(res_message.to_dict())
# original_messages.append({
#     "role": "user",
#     "content": "Where can I find the trash can. I am currently in the bathroom."
# })

In [6]:
# res2 = openai.ChatCompletion.create(
#     model=chat_lm_id,
#     messages=original_messages
# )