In [None]:
import requests
import json
import time
from tqdm import tqdm

headers = {
    'Authorization': 'Bearer <OPEN_AI_KEY>',
    'Content-Type': 'application/json',
}
MODEL_TYPE = "gpt-3.5-turbo-0613" # gpt-4
examples = json.load(open("/PCA-EVAL/pca-eval/data/v1.0/Open-World Game/meta_data.json","r"))


In [2]:
def format_choices(choices):
    # example: ['Phoenix', 'Baton Rouge', 'Honolulu', 'Cheyenne'] -> "(A) Phoenix. (B) Baton Rouge. (C) Honolulu. (D) Cheyenne."
    return " ".join([f"({chr(ord('A') + i)}) {choice}" for i, choice in enumerate(choices)])

In [3]:
start_prompt = """You are a professional multimodal embodied reasoner. Your are assisted with multiple visual api which can anwser your questions about an  image. Your job is to select the best action to anwser my question based on an image.  Note that you can't directly see the image but through the anwser of API. I will first give you the description of valid APIs and then give you the question. You can gather information from the api when giving the anwser. Note that you can craft an item only if you have all needed materials."""


In [4]:
api_prompt = """#API Description
def list_nearby_mobs_in_minecraft():
    \"""
    Lists nearby mobs in Minecraft.
    :return: list of nearby mobs, e.g. ['creeper', 'pig']
    \"""
    pass

def list_inventory_information():
    \"""
    Lists inventory information of the player in Minecraft.
    :return: list of inventory information with number, e.g. [('diamond', 64), ('iron', 32)]
    \"""
    pass"""

In [5]:
print(api_prompt)

#API Description
def list_nearby_mobs_in_minecraft():
    """
    Lists nearby mobs in Minecraft.
    :return: list of nearby mobs, e.g. ['creeper', 'pig']
    """
    pass

def list_inventory_information():
    """
    Lists inventory information of the player in Minecraft.
    :return: list of inventory information with number, e.g. [('diamond', 64), ('iron', 32)]
    """
    pass


# 新段落

In [13]:
def chat_minecraft(example_json,model="gpt-4"):
    goal_prompt = example_json['question']
    actions_str = format_choices(example_json['actions'])
    anwser = example_json['answer_index']

    inventory = example_json['api_cached_result']['list_items_in_inventory']
    nearby_mobs = example_json['api_cached_result']['list_nearby_mobs']



    request = {
    "model": model,
    "messages": [
    {
      "role": "user",
      "content": start_prompt
    },
    {
      "role": "assistant",
      "content": "Sure, please provide me with the description of the valid APIs and your question."
    },
    {
      "role": "user",
      "content": api_prompt
    },
    {
        "role": "assistant",
        "content": "Thank you for providing the descriptions of the valid APIs. Please go ahead and ask your question so that I can assist you in selecting the best action based on the image."
    },
    {
      "role": "user",
      "content": "Question: You are a powerful game assistant in Minecraft. Your goal is to {}. Based on current condition, what is the best action to do next? {}".format(goal_prompt,actions_str)
    },
    ]
    }

    api_call_history = {
        "list_nearby_mobs_in_minecraft":0,
        "list_inventory_information":0,
    }

    while True:

      while True:
        try:
          response = requests.post('https://api.openai.com/v1/chat/completions',
                         headers=headers,
                         data=json.dumps(request))
          model_response_json = json.loads(response.text)['choices'][0]['message']
          break
        except Exception as e:
          print(response.text)
          continue


      # check whether api call exists in the last response


      print(model_response_json)

      request['messages'].append(model_response_json)

      api_response = ""
      has_api_call = 0

      if "list_nearby_mobs_in_minecraft" in model_response_json['content'] and not api_call_history['list_nearby_mobs_in_minecraft']:
        api_response += "list_nearby_mobs_in_minecraft() = "+str(nearby_mobs)+"\n"
        has_api_call = 1
        api_call_history['list_nearby_mobs_in_minecraft'] = 1


      if "list_inventory_information" in model_response_json['content'] and not api_call_history['list_inventory_information']:
        api_response += "list_inventory_information() = "+str(inventory)+"\n"
        has_api_call = 1
        api_call_history['list_inventory_information'] = 1

      request['messages'].append({
          "role":"user",
          "content":api_response
      })

      if not has_api_call:
        break



    return request

In [7]:
examples[0]

{'version': '1.0',
 'domain': 'Open-World Game',
 'index': 0,
 'image': 'minecraft_0.png',
 'question': 'Harvest wool',
 'actions': ['find sheep',
  'shear sheep',
  'craft shears',
  'find iron ore',
  'craft iron ingot'],
 'answer_index': 3,
 'reason': "To get wool, you need to find a sheep, and use shears to get the wool. There is sheep nearby, but you don't have shears, so you need to craft shears first. To craft shears, you need two iron ingots. You don't have iron ingot, so you need to find iron ore first to craft iron ingot.",
 'key_concept': ['Sheep nearby', 'No shears', 'No iron ingot'],
 'api_cached_result': {'list_nearby_mobs': ['sheep'],
  'list_items_in_inventory': []}}

In [8]:
model_anwsers = []

In [9]:
len(model_anwsers)

0

In [None]:
for i in tqdm(examples[:100]):
  model_anwsers.append(chat_minecraft(i,MODEL_TYPE))

In [None]:
model_anwsers[0]

In [None]:
with open("minecraft_v1_gpt3.5_anwser.json","w") as f:
  json.dump(model_anwsers,f,indent=4)

In [None]:
model_anwsers[0]['messages'][-2]['content']

"Based on the information provided by the APIs, the current condition is as follows:\n\n`list_nearby_mobs_in_minecraft() = ['sheep']`\n\n`list_inventory_information() = []`\n\nIn this situation, since there are nearby sheep but the inventory is empty, the best action to harvest wool would be to shear the sheep (Option B). \n\nPlease note that crafting shears or finding iron ore is not necessary in this case as there are already nearby sheep available."

In [None]:
def chat_parse_anwser(example_json,model_anwser,model="gpt-3.5-turbo-0613"):

    actions_str = format_choices(example_json['actions'])
    request = {
    "model": model,
    "messages": [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "You are going to help me parse the model output into the options. I will give you the option list and the model output, you need to anwser which option does the model output imply and respond in json format like {\"model_output\":\"A\"} , if the anwser does not match with any option, or the modal can't make any option right now, just output {\"model_output\":\"None\"}. "},
            {"role": "assistant", "content": "Sure, I will output the correct json return based on model's output."},
            {"role": "user", "content": "Options:%s \n Model Output:%s"%(actions_str,model_anwser)},
        ]
    }

    while True:
      try:
        response = requests.post('https://api.openai.com/v1/chat/completions',
                        headers=headers,
                        data=json.dumps(request))
        break
      except Exception as e:
        continue


      # check whether api call exists in the last response

    model_response_json = json.loads(response.text)['choices'][0]['message']


    return model_response_json

In [None]:
chat_parse_anwser(examples[0],model_anwsers[0]['messages'][-2]['content'])

{'role': 'assistant', 'content': '{"model_output":"B"}'}

In [None]:
parsed_anwser=[]

In [None]:
len(model_anwsers)

100

In [None]:
len(parsed_anwser)

0

In [None]:
for i,j in zip(examples[:100],model_anwsers[:100]):
  model_anwser = j['messages'][-2]['content']
  parsed_result = chat_parse_anwser(i,j['messages'][-2]['content'])
  action_list = format_choices(i['actions'])
  answer = i['answer_index']
  key_concept = i['key_concept']
  reason = i['reason']

  parsed_anwser.append({
      "model_anwser":model_anwser,
      "parsed_result":parsed_result,
      "action_list":action_list,
      "anwser":chr(ord('A') + answer),
      "key_concept":key_concept,
      "reason":reason
  })


In [None]:
with open("minecraft_v1_gpt3.5_anwser_parsed.json","w") as f:
  json.dump(parsed_anwser,f,indent=4)

In [None]:
import re

def extract_substrings(s):
    return re.findall(r'\{.*?\}', s)

In [None]:
rough_correct = 0
for index, i in enumerate(parsed_anwser):
  parsed_dict = extract_substrings(i['parsed_result']['content'])
  if len(parsed_dict)!=0:
    try:
      print(index+1, i['anwser'],eval(parsed_dict[0]), int(i['anwser']==eval(parsed_dict[0])['model_output']),sep=" | ")
      if i['anwser']==eval(parsed_dict[0])['model_output']:
        rough_correct+=1
    except:
      print(index+1, i['anwser'], i['parsed_result']['content'].replace("\n","")+"$",0,sep=" | ")
  else:
    print(index+1, i['anwser'], i['parsed_result']['content'].replace("\n","")+"$",0,sep=" | ")

In [None]:
rough_action_acc = rough_correct/100

In [None]:
rough_action_acc # 0.36 in our experiment

In [None]:
# the action acc could be lower than the real acc since some parsing result could be wrong, need double check to get the final results