## CoELA Baseline

Refer this [paper](https://arxiv.org/abs/2307.02485).

In [3]:
import re
import base64
import requests
import json, os
import pandas as pd
from typing import List
import sys
from pathlib import Path

# set directory to top folder level to address relative imports
directory = Path(os.getcwd()).absolute()
sys.path.append(str(directory.parent.parent.parent))

# import environment
from AI2Thor.env_new import AI2ThorEnv
from AI2Thor.object_actions import get_all_available_actions, inverse_act2text

# save a json file with your openai api key in your
# home folder as {"my_openai_api_key": "INSERT API HERE"}
with open(os.path.expanduser("~") + "/openai_key.json") as json_file:
    key = json.load(json_file)
    api_key = key["my_openai_api_key"]
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}

df = pd.DataFrame(columns=['Step', 'Action', 'Success', 'Coverage', 'Transport Rate'])

def append_row(df, step, action, success, coverage, transport_rate):
    row = pd.DataFrame([[step, action, success, coverage, transport_rate]], columns=['Step', 'Action', 'Success', 'Coverage', 'Transport Rate'])
    df = pd.concat([df, row])
    return df


# initialize environment parameters
class Config:
    def __init__(self):
        self.num_agents = 2
        self.scene = "FloorPlan1"
        self.scene_name = "FloorPlan1"
        self.model = "gpt-4"
        self.horizon = 30           # change this to 30
        self.use_langchain = False
        self.use_strict_format = True
        self.use_obs_summariser = False
        self.use_act_summariser = False
        self.use_action_failure = True
        self.use_shared_subtask = True
        self.use_separate_subtask = False
        self.use_future_message = True
        self.forceAction = False
        self.use_memory = True
        self.use_plan = True
        self.use_separate_memory = False
        self.use_shared_memory = True
        self.temperature = 0.7
config = Config()

# initialise the environment
env = AI2ThorEnv(config)
# reset the environment with a new task
d = env.reset(task="place a tomato, lettuce and bread in the fridge")
# d = env.step(['Move(Ahead)'])


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


__________________________________________________
Pre-initializing environment with closest task: 1_put_bread_lettuce_tomato_fridge
__________________________________________________
__________________________________________________
Subtasks to complete:
NavigateTo(Bread)
PickupObject(Bread)
NavigateTo(Fridge, Bread)
PutObject(Fridge, Bread)
NavigateTo(Tomato)
PickupObject(Tomato)
NavigateTo(Fridge, Tomato)
PutObject(Fridge, Tomato)
NavigateTo(Lettuce)
PickupObject(Lettuce)
NavigateTo(Fridge, Lettuce)
PutObject(Fridge, Lettuce)
OpenObject(Fridge)
CloseObject(Fridge)
__________________________________________________
Preinitializing the environment
__________________________________________________


In [5]:
class CoELA_Agent(LLM):
    def __init__(self, env, agent_id:int, agent_name:str, friend_name:str, task:int, total_num_steps:int):
        super().__init__()
        self.env = env
        self.agent_id = agent_id
        self.name = agent_name
        self.friend_name = friend_name
        self.task = task
        self.dialogues = ["Hi, I’ll let you know if I find any target objects and containers, finish any subgoals, and ask for your help when necessary.",
            "Thanks! I’ll let you know if I find any target objects and containers, finish any subgoals, and ask for your help when necessary."]
        self.dialogues_speaker = ["Alice", "Bob"]
        self.total_num_steps = total_num_steps

    def create_agent_planner_prompt(self) -> str:
        agent_prompt=f"""I’m {self.name}. My friend {self.friend_name} and I want to complete a task together within 3000 steps. I can hold one object at a time. Given an image from my point of view of the environment, our shared goal, dialogue history, my progress, and previous actions, please help me choose the best available action to achieve the goal as soon as possible. All objects are denoted as "<object_name>_<object_id>", such as "Table_2". Actions take several steps to finish. Your output should just be the action name."""
        return agent_prompt

    def create_agent_comm_prompt(self) -> str:
        agent_prompt=f"""I’m {self.name}. My friend {self.friend_name} and I want to complete a task together within 3000 steps. I can hold one object at a time. Given an image from my point of view of the environment, our shared goal, dialogue history, my progress, and previous actions, please help me generate a short message to send to {self.friend_name} to help us achieve the goal as soon as possible. All objects are denoted as "<object_name>_<object_id>", such as "Table_2". Actions take several steps to finish."""
        return agent_prompt

    def add_goal_desc(self) -> str:
        agent_instruction=f"\nGoal: {self.task}\n"
        return agent_instruction

    def add_available_actions_prompt(self, prompt:str, message:str) -> str:
        agent_actions_prompt="Available actions: (You can only choose the action in the list)\n"
        if message is not None:
            agent_actions_prompt += f"1. Send a message: {message}\n"
        
        inventory = self._convert_inventory()
        for i, act in enumerate(get_all_available_actions(self.env.all_obs[self.agent_id], inventory)):
            if message is None:
                agent_actions_prompt += f"{i+1}. {act}\n"
            else:
                agent_actions_prompt += f"{i+2}. {act}\n"
        return prompt+agent_actions_prompt

    def add_prev_actions_prompt(self, prompt:str) -> str:
        """
            prompt: str
                Prompt constructed so far
            action_history: List
                List of actions taken so far
                Example: ["NavigateTo(Bread_1)", "PickupObject(Bread_1)"]
            step_nums: List
                Example: [1, 13]
        """
        agent_prev_actions_prompt="Previous actions: "
        if len(self.env.action_history[self.name]) == 0:
            agent_prev_actions_prompt += "No actions taken yet.\n"
            return prompt+agent_prev_actions_prompt
        assert len(self.env.action_history[self.name]) == len(self.env.step_nums_history[self.name])
        for i, action in enumerate(self.env.action_history[self.name]):
            act_text = inverse_act2text(action)
            success_str = "was successful" if self.env.action_success[self.name][i] else "failed"
            agent_prev_actions_prompt += f"{act_text} at step {self.env.step_nums_history[self.name][i]} which {success_str}, "
        # remove the last comma and space
        agent_prev_actions_prompt = agent_prev_actions_prompt[:-2] + "\n"
        return prompt+agent_prev_actions_prompt

    def add_dialogue_history_prompt(self, prompt:str) -> str:
        """
            prompt: str
                Prompt constructed so far
            dialogues: List[str]
                List of dialogues
                Example: ["Hi, I’ll let you know if I find any target objects and containers, finish any subgoals, and ask for your help when necessary.", "Thanks! I’ll let you know if I find any target objects and containers, finish any subgoals, and ask for your help when necessary."]
            dialogues_speaker: List[str]
                Example: ["Alice", "Bob"]
        """
        agent_dialogue_history_prompt="Dialogue history:\n"
        for i, dialogue in enumerate(self.dialogues):
            agent_dialogue_history_prompt += f"{self.dialogues_speaker[i]}: \"{dialogue}\"\n"
        return prompt+agent_dialogue_history_prompt

    def add_progress_desc(self, prompt:str) -> str:
        """This needs a checker function to get the list of completed subtasks which are pre-defined"""
        inventory = self._convert_inventory()
        progress_prompt = f"Progress: I have taken {self.env.step_num[self.agent_id]}/{self.total_num_steps}. "
        object_hold_str = "I’m holding nothing. " if len(inventory) == 0 else f"I’m holding {', '.join(inventory)}. "
        progress_prompt += object_hold_str
        if len(self.env.checker.subtasks_completed_numerated) > 0:
            pass
        else:
            progress_prompt += "We haven't made any progress towards our goal yet. "
        return prompt + progress_prompt + "\n"

    def prepare_planner_prompt(self,
                            message:str) -> str:
        """CoELA prompt consists of following parts:
        1. Agent prompt     [-]
        2. Goal description [-]
        3. Progress         [-]
        4. Dialogue history [-]
        5. Previous actions [-]
        6. Available actions[-]
        """
        system_prompt = self.create_agent_planner_prompt()
        prompt = self.add_goal_desc()
        prompt = self.add_progress_desc(prompt)
        prompt = self.add_dialogue_history_prompt(prompt)
        prompt = self.add_prev_actions_prompt(prompt)
        prompt = self.add_available_actions_prompt(prompt, message)
        prompt = prompt + "Answer: Let’s think step by step.\n"
        return system_prompt, prompt

    def prepare_comm_prompt(self,
                            message:str) -> str:
        """CoELA prompt consists of following parts:
        1. Agent prompt     [-]
        2. Goal description [-]
        3. Progress         [-]
        4. Dialogue history [-]
        5. Previous actions [-]
        6. Available actions[-]
        """
        system_prompt = self.create_agent_comm_prompt()
        prompt = self.add_goal_desc()
        prompt = self.add_progress_desc(prompt)
        prompt = self.add_dialogue_history_prompt(prompt)
        prompt = self.add_prev_actions_prompt(prompt)
        prompt = self.add_available_actions_prompt(prompt, message)
        prompt = prompt + "Note: The generated message should be accurate, helpful and brief. Do not generate repetitive messages.\n"
        return system_prompt, prompt
    
    def _convert_inventory(self):
        inventory = self.env.inventory[self.agent_id]
        if inventory == 'nothing':
            inventory = []
        else:
            inventory = [inventory]
        return inventory


NameError: name 'LLM' is not defined

In [None]:
def encode_image(image_path:str):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

class LLM:
    def __init__(self):
        pass

    def prepare_payload(self, system_prompt, user_prompt):
        """# payload consists of 
        * the image from each agent's perspective
        * the system prompt (which is constant)
        * the user prompt (which changes based on the state)
        This is then sent to the openai api to get the response (action or plan or verification of the plan)
        """
        base64_image = []

        image_path = env.get_frame(self.agent_id)
        base64_image.append(encode_image(image_path))
        payload = {
            "model": "gpt-4-vision-preview",
            "messages": [
                {
                    "role": "system",
                    "content": [
                        {"type": "text", "text": system_prompt},
                    ],
                },
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": user_prompt},
                        {
                            "type": "image_url",
                            "image_url": {"url": f"data:image/jpeg;base64,{base64_image[0]}"},
                        },
                    ],
                }
            ],
            "max_tokens": 1000,
            "temperature": config.temperature,
        }
        return payload
    
    def get_gpt_response(self, system_prompt, user_prompt):
        payload = self.prepare_payload(system_prompt, user_prompt)
        response = requests.post(
        "https://api.openai.com/v1/chat/completions", headers=headers, json=payload
    )
        return response


def get_action(response):
    response_dict = response.json()
    # convert the string to a dict
    output = response_dict["choices"][0]["message"]["content"]
    dict_match = re.search(r'\{.*\}', output)

    if dict_match:
        # Extract the dictionary from the matched string
        return json.loads(dict_match.group())
    else:
        return None

In [None]:
agent1 = CoELA_Agent(env, 0, "Alice", "Bob", "place a tomato, lettuce and bread in the fridge", 3000)
agent2 = CoELA_Agent(env, 1, "Bob", "Alice", "place a tomato, lettuce and bread in the fridge", 3000)
agents_list = [agent1, agent2]

for i in range(30):
    actions = []
    # get actions for all agents
    for agent in agents_list:
        agent_comm_sys_prompt, agent_comm_user_prompt = agent.prepare_comm_prompt(None)
        response = agent.get_gpt_response(agent_comm_sys_prompt, agent_comm_user_prompt)
        message = response.json()['choices'][0]['message']['content']

        agent_planner_sys_prompt, agent_planner_user_prompt = agent.prepare_planner_prompt(message)
        response = agent.get_gpt_response(agent_planner_sys_prompt, agent_planner_user_prompt)
        action = response.json()['choices'][0]['message']['content']
        actions.append(action)



I’m Alice. My friend Bob and I want to complete a task together within 3000 steps. I can hold one object at a time. Given an image from my point of view of the environment, our shared goal, dialogue history, my progress, and previous actions, please help me generate a short message to send to Bob to help us achieve the goal as soon as possible. All objects are denoted as "<object_name>_<object_id>", such as "Table_2". Actions take several steps to finish.

Goal: place a tomato, lettuce and bread in the fridge
Progress: I have taken 0/3000. I’m holding nothing. We haven't made any progress towards our goal yet. 
Dialogue history:
Alice: "Hi, I’ll let you know if I find any target objects and containers, finish any subgoals, and ask for your help when necessary."
Bob: "Thanks! I’ll let you know if I find any target objects and containers, finish any subgoals, and ask for your help when necessary."
Previous actions: No actions taken yet.
Available actions: (You can only choose the action 

In [None]:
response = agent1.get_gpt_response(agent1_c_sp, agent1_c_up)
response.json()['choices'][0]['message']['content']

"I see a tomato on the counter to the right and lettuce on the island in front of me. I'll navigate to the tomato and lettuce to pick them up. If you find the bread, please pick it up and let's meet at the fridge to store them."

In [None]:
message = response.json()['choices'][0]['message']['content']

In [None]:
agent1_p_sp, agent1_p_up = agent1.prepare_planner_prompt(message=message)
print(agent1_p_sp)
print(agent1_p_up)

I’m Alice. My friend Bob and I want to complete a task together within 3000 steps. I can hold one object at a time. Given an image from my point of view of the environment, our shared goal, dialogue history, my progress, and previous actions, please help me choose the best available action to achieve the goal as soon as possible. All objects are denoted as "<object_name>_<object_id>", such as "Table_2". Actions take several steps to finish. Your output should just be the action name.

Goal: place a tomato, lettuce and bread in the fridge
Progress: I have taken 0/3000. I’m holding nothing. We haven't made any progress towards our goal yet. 
Dialogue history:
Alice: "Hi, I’ll let you know if I find any target objects and containers, finish any subgoals, and ask for your help when necessary."
Bob: "Thanks! I’ll let you know if I find any target objects and containers, finish any subgoals, and ask for your help when necessary."
Previous actions: No actions taken yet.
Available actions: (Yo

In [None]:
response = agent1.get_gpt_response(agent1_p_sp, agent1_p_up)
response.json()['choices'][0]['message']['content']

'NavigateTo(Tomato_1)'