In [None]:
from src.IVRECore import IVRE
from src.IVRE import SymbolicIVRE
from tqdm import trange
import matplotlib.pyplot as plt
import numpy as np
import argparse
import json
import os

In [None]:
import openai
import time

openai.api_key = os.environ["OPENAI_API_KEY"]

alpha_index = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]

def chat(content, messages, role="user"):
  messages.append({"role": role, "content": content})
  while(True):
    try:
      completion = openai.ChatCompletion.create(
        model="gpt-4",
        max_tokens=15,
        messages=messages,
        temperature=0,
      )
      break
    except:
        time.sleep(1)
  

  chat_response = completion
  answer = chat_response.choices[0].message.content
  print(f'GPT-4: {answer}')
  messages.append({"role": "assistant", "content": answer})
  return answer, messages

def gpt_plan(obs, messages, env):
  objs = [alpha_index[int(idx)] for idx, i in enumerate(env.history[env.env.stepcnt][:-1]) if int(i) == 1]
  state = "activated" if int(env.history[env.env.stepcnt][-1]) == 1 else "not activated"
  q = f'Round {env.stepcnt}:\n {", ".join(objs)} are on the machine, and the machine is {state}.\nQuestion: Which objects can activate the machine?\nAnswer: From 1-9, objects that can activate the machine are '
  print(q)
  answer, messages = chat(q, messages, role="user")
  action = [-1] * sum(env.env.available_objects) * 2
  belief = [i for i, alpha in enumerate(alpha_index) if alpha in answer]
  for i in belief:
    action[i] = 1
  if env.stepcnt < 4:
    return messages, np.array(action)
  else:
    t = "Now design your own trial, you can put some of the objects on the machine to validate your hypothesis. Question: Which objects will you put on the machine?\nAnswer: From 1-9, I will put "
    print(t)
    answer, messages = chat(t, messages, role="user")
    trial = [i for i, alpha in enumerate(alpha_index) if alpha in answer]
    for i in trial:
      action[i+sum(env.env.available_objects)] = 1
    return messages, np.array(action)

In [None]:
def main():
    marks = []
    trials = []
    contexts = []
    sol_cnt = 0
    for i in trange(100):
        context = []
        context.append({"role": "user", "content": "There are nine objects: 1, 2, 3, 4, 5, 6, 7, 8, 9 that can be put on a machine. Up to four of all the objects have a unique property that can activate the machine. Your goal is to find out all the objects that can activate the machine in ten rounds.\nAnswer all the following questions in the format of \"1, 2, and 3.\"\n"})
        total_reward = 0
        env = SymbolicIVRE(IVRE())
        obs = env.reset()
        # print("True answer: ", env.env.blicketProblem.true_label)
        for step in range(10):
            # print("Step {}".format(step + 1))
            context, action = gpt_plan(obs, context, env)
            obs, reward, done, info = env.step(action)
            # print('obs=', obs, 'reward=', reward, 'done=', done)
            total_reward += reward
            if done:
                # print("Game Over!", "reward=", total_reward)
                if reward == 20:
                    sol_cnt += 1
                break
            else:
                context.append({"role": "user", "content": "Your answer is wrong."})
        marks.append(total_reward)
        trials.append(step)
        contexts.append(context)
        print(f"Average mark at step {i}: {np.mean(marks)}, Sol_cnt: {sol_cnt}")

    # save the context
    with open('context.json', 'w') as fout:
        json.dump(contexts, fout)
    plt.plot([x for x in range(len(marks))], marks)
    plt.plot([x for x in range(len(marks))], trials)
    with open("reward_trial.txt", "w") as output:
        output.write(str(marks))
        output.write(str("\n"))
        output.write(str(trials))
    # plt.show()
    print("Average mark: ", np.mean(marks), "Sol_cnt: ", sol_cnt)

main()