In [1]:
import json
import requests
import time
import pandas as pd
import os
import openai
# from openai import OpenAI

import backoff

In [2]:
!openai -V

openai 1.59.4


# Functions

In [None]:
def run_solver(domain_file, problem_file, solver):

    req_body = {"domain" : domain_file, "problem" : problem_file}

    # Send job request to solve endpoint
    solve_request_url=requests.post(f"https://solver.planning.domains:5001/package/{solver}/solve", json=req_body).json()

    # Query the result in the job
    celery_result=requests.post('https://solver.planning.domains:5001' + solve_request_url['result'])

    while celery_result.json().get("status","")== 'PENDING':
        # Query the result every 0.5 seconds while the job is executing
        celery_result=requests.post('https://solver.planning.domains:5001' + solve_request_url['result'])
        time.sleep(0.5)

    result = celery_result.json()['result']
    return result

df = """(define (domain explore)
    (:requirements :strips :typing)
    (:types location direction)
    (:predicates
        (at ?loc - location)
        (connected ?loc1 - location ?loc2 - location ?dir - direction)
        (door-closed ?loc1 - location ?loc2 - location)
    )
    (:action open-door
        :parameters (?loc1 - location ?loc2 - location ?dir - direction)
        :precondition (and (connected ?loc1 ?loc2 ?dir) (door-closed ?loc1 ?loc2))
        :effect (not (door-closed ?loc1 ?loc2))
    )
    (:action move
        :parameters (?from - location ?to - location ?dir - direction)
        :precondition (and (connected ?from ?to ?dir) (not (door-closed ?from ?to)))
        :effect (and (not (at ?from)) (at ?to))
    )
)"""

pf = """(define (problem explore-instance)
    (:domain explore)
    (:objects
        kitchen - location
        patio - location
        backyard - location
        driveway - location
        street - location
        south north east west - direction
    )
    (:init
        (at driveway)
        (connected kitchen patio south)
        (connected patio kitchen north)
        (connected backyard driveway south)
        (connected driveway backyard north)
        (connected backyard street east)
        (door-closed patio backyard)
    )
    (:goal
        (at street)
    )
)"""

run_solver(df, pf, "dual-bfws-ffparser")

{'call': 'timeout 30 planutils run dual-bfws-ffparser -- domain problem plan',
 'output': {'plan': '(MOVE BACKYARD STREET EAST)\n'},
 'output_type': 'generic',
 'stderr': '',
 'stdout': ' --- OK.\n Match tree built with 5 nodes.\n\nPDDL problem description loaded: \n\tDomain: EXPLORE\n\tProblem: EXPLORE-INSTANCE\n\t#Actions: 5\n\t#Fluents: 5\nGoals found: 1\nGoals_Edges found: 1\nStarting search with 1-BFWS...\n--[1 / 0]--\n--[0 / 0]--\n--[0 / 1]--\nTotal time: 6.10001e-05\nNodes generated during search: 5\nNodes expanded during search: 1\nPlan found with cost: 1\nFast-BFS search completed in 6.10001e-05 secs\n'}

In [None]:
# Set the environment variable within the Python script
# os.environ["OPENAI_API_KEY"] = ""

# # Check if the environment variable is available
# print(os.getenv("OPENAI_API_KEY"))

In [5]:
def summarize_obs(obs):
    # If obs only has one line, return it
    if len(obs.split('\n')) == 1:
        return obs
    # Only keep where you are and location informtion
    else:
        return obs.split('\n')[0].split(". ")[0] + ". " + obs.split('\n')[1]

In [6]:
from openai import OpenAI

client = OpenAI()

def run_gpt(prompt):
    response = client.chat.completions.create(
        model='gpt-4o',
        messages=[{"role": "user", "content": prompt}],
        # max_completion_tokens=2048,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )

    response_content = response.choices[0].message.content

    if response_content.startswith("```json"):
        response_content = response_content.lstrip("```json").rstrip("```").strip()

    result = json.loads(response_content)

    df = result.get("df", None)
    pf = result.get("pf", None)

    if df is None or pf is None:
        raise ValueError("Missing 'df' or 'pf' in the response. Check the prompt or the model output.")

    return df, pf

In [7]:
prompt = """Please provide the output in JSON format, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'.
        The output format should be: {"df": "...", "pf": "..."}
    
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here are your current observations: Action: look around
You are in the kitchen. To the South you see a closed patio door. To the West you see a closed plain door. 

        Here are some valid actions you can take: ['close door to south', 'close door to west', 'move south', 'move west', 'open door to south', 'open door to west']
        You should generate df and pf strictly follow this valid actions. There are in total 2 actions, that should exactly be the following two:
        1. :action open-door
            :parameters (?loc1 - location ?loc2 - location ?dir - direction)
        2. :action move
            :parameters (?from - location ?to - location ?dir - direction)
        You should have a goal in the problem file like this: 
        (:goal 
            (at ?location)
        ) where location should be somewhere not visited"""

df, pf = run_gpt(prompt)
print(df)
print(pf)

(define (domain exploration)
    (:predicates
        (at ?loc - location)
        (door ?loc1 - location ?loc2 - location ?dir - direction)
        (open ?loc1 - location ?loc2 - location ?dir - direction)
        (visited ?loc - location)
    )

    (:action open-door
        :parameters (?loc1 - location ?loc2 - location ?dir - direction)
        :precondition (and (at ?loc1) (door ?loc1 ?loc2 ?dir) (not (open ?loc1 ?loc2 ?dir)))
        :effect (open ?loc1 ?loc2 ?dir)
    )

    (:action move
        :parameters (?from - location ?to - location ?dir - direction)
        :precondition (and (at ?from) (open ?from ?to ?dir))
        :effect (and (at ?to) (not (at ?from)) (visited ?to))
    )
)
(define (problem explore-house)
    (:domain exploration)
    (:objects
        kitchen - location
        patio - location
        west-room - location
        south - direction
        west - direction
    )
    (:init
        (at kitchen)
        (door kitchen patio south)
        (door kitch

In [8]:
def map_actions(action):
    actions = action.lower().replace("(", "").replace(")", "").split('\n')
    action_lst = []
    for act in actions:
        if "open" in act and "door" in act:
            direction = act.split(' ')[-1]
            action_lst.append(f'open door to {direction}')
        elif "move" in act:
            action_lst.append(f"move {act.split(' ')[-1]}")
    if len(action_lst) == 0:
        return None
    return action_lst
    
map_actions("(OPEN-DOOR KITCHEN PLACEHOLDER_WEST WEST)\n(MOVE KITCHEN PLACEHOLDER_WEST WEST)\n(REACH-GOAL)\n")

['open door to west', 'move west']

In [None]:
# help with run above~

# Prompts

## Basic prompts

In [None]:
# simple room and direct enter
prompt = """
Please provide the output in JSON format, including the domain and problem PDDL files as 'df' and 'pf'.
Generate a PDDL domain file and a PDDL problem file for a CoinCollector game where:
- There are rooms, doors, and coins.
- Room should be connected with at lease one other room and connected rooms have one door in between.
- The goal is to move between rooms and collect all the coins
Please ensure the syntax is correct and remember to declare every room or other necessary objects.

Please generate the PDDL DF and PF in the following format. This is only infomation that you generate:
{"df": "...", "pf": "..."}
"""

In [4]:
# closed door
prompt = """
Please provide the output in JSON format, including the domain and problem PDDL files as 'df' and 'pf'.
Generate a PDDL domain file and a PDDL problem file for a CoinCollector game where:
- There are rooms, doors, and coins.
- Room should be connected with at lease one other room and connected rooms have one door in between, which could be closed initially.
- The goal is to move between rooms and collect all the coins
Please ensure the syntax is correct and remember to declare every room or other necessary objects.

Please generate the PDDL DF and PF in the following format. This is only infomation that you generate:
{"df": "...", "pf": "..."}
"""

In [18]:
# Explore all unexplored rooms (may need to go back)
prompt = """
Please provide the output in JSON format, including the domain and problem PDDL files as 'df' and 'pf'.
Generate a PDDL domain file and a PDDL problem file for a CoinCollector game where:
- There are rooms, doors, and coins.
- Room should be connected with at lease one other room and connected rooms should have one unique door in between, which could be closed initially.
- The goal is to move between rooms and collect all the coins
The situation is more complex with one room connected with two single room. You need to make sure exploring all unexplored rooms to collect all coins. 
Sometimes you may need to go back to the previously explored rooms in order to get into other unexplored rooms.
Please ensure the syntax is correct and remember to declare every room or other necessary objects.
Be careful when defining predicates and objects do not generate redundant ones or miss something.
One error message: undeclared predicate DOOR used in domain definition.

Please generate the PDDL DF and PF in the following format. This is only infomation that you generate:
{"df": "...", "pf": "..."}
"""

In [35]:
from textworld_express import TextWorldExpressEnv

env = TextWorldExpressEnv(envStepLimit=100)

NUM_LOCATIONS = 11
env.load(gameName="coin", gameParams=f"numLocations={NUM_LOCATIONS},numDistractorItems=0,includeDoors=1,limitInventorySize=0")

obs, infos = env.reset(seed=1, gameFold="train", generateGoldPath=True)

# Prompt from textworld_express
prompt = f"""
Please provide the output in JSON format, including the domain and problem PDDL files as 'df' and 'pf'.
Generate a PDDL domain file and a PDDL problem file for a CoinCollector game where:
- Task Description: {infos['taskDescription']}
- Your initial observation: {obs}
- Your potential actions: {infos['validActions']}

Some notes:
Please ensure the syntax is correct and remember to declare every room or other necessary objects.
Be careful when defining predicates and objects do not generate redundant ones or miss something.
One error message: undeclared predicate DOOR used in domain definition.
Make sure the actions are generalized instead of too specific, i.e. move to some place in some direction.
The initial observation is the observation that you are starting at. Remember to design the full map to solve.


Please generate the PDDL DF and PF in the following format. This is only infomation that you generate:
{{"df": "...", "pf": "..."}}
"""

In [36]:
print(prompt)


Please provide the output in JSON format, including the domain and problem PDDL files as 'df' and 'pf'.
Generate a PDDL domain file and a PDDL problem file for a CoinCollector game where:
- Task Description: Your task is to search the environment and find the coin.  Once you find the coin, take it.
- Your initial observation: You are in the kitchen. In one part of the room you see a stove. There is also an oven. You also see a fridge that is closed. In another part of the room you see a counter, that has nothing on it. In one part of the room you see a kitchen cupboard that is closed. There is also a cutlery drawer that is closed. You also see a trash can that is closed. In another part of the room you see a dishwasher that is closed. In one part of the room you see a dining chair, that has nothing on it. 
To the South you see a closed patio door. To the West you see a closed plain door. 
- Your potential actions: ['look around', 'close door to west', 'move west', 'open door to south'

In [37]:
# OpenAI new version

from openai import OpenAI
client = OpenAI()

response = client.chat.completions.create(
  model="gpt-4o",
  messages=[
    {
      "role": "user",
      "content": prompt
    }
  ],
  temperature=1,
  max_tokens=2048,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0,
  response_format={"type": "json_object"}
)

response_content = response.choices[0].message.content
result = json.loads(response_content)

df = result['df']
pf = result['pf']
print(df)
print(pf)

(define (domain CoinCollector)
  (:requirements :strips :typing)
  (:types room object)
  (:predicates 
    (at ?r - room)
    (door ?d - object)
    (closed ?d - object)
    (in ?o - object ?r - room)
    (coin-collected)
  )

  (:action move
    :parameters (?from ?to - room ?d - object)
    :precondition (and (at ?from) (door ?d) (closed ?d))
    :effect (and (not (at ?from)) (at ?to))
  )

  (:action open-door
    :parameters (?d - object)
    :precondition (and (door ?d) (closed ?d))
    :effect (not (closed ?d))
  )

  (:action close-door
    :parameters (?d - object)
    :precondition (door ?d)
    :effect (closed ?d)
  )

  (:action look-around
    :parameters (?r - room)
    :precondition (at ?r)
    :effect (and (not (in coin ?r)) (coin-collected))
  )
)

(define (problem CoinCollector-problem)
  (:domain CoinCollector)
  (:objects 
    kitchen patio-room - room
    plain-door patio-door - object
    coin - object
  )
  (:init
    (at kitchen)
    (door plain-door)
    (door 

In [21]:
df = """(define (domain CoinCollector)
  (:requirements :strips :typing)
  (:types room door)
  (:predicates 
    (connected ?r1 - room ?r2 - room ?d - door)
    (closed ?d - door)
    (at ?r - room)
    (coin-at ?r - room)
    (collected ?r - room)
  )

  (:action move
    :parameters (?r1 - room ?r2 - room ?d - door)
    :precondition (and (connected ?r1 ?r2 ?d) (at ?r1) (not (closed ?d)))
    :effect (and (not (at ?r1)) (at ?r2))
  )

  (:action open-door
    :parameters (?d - door)
    :precondition (closed ?d)
    :effect (not (closed ?d))
  )

  (:action collect-coin
    :parameters (?r - room)
    :precondition (and (at ?r) (coin-at ?r))
    :effect (and (collected ?r) (not (coin-at ?r)))
  )
)"""

pf = """(define (problem CoinCollectorProblem)
  (:domain CoinCollector)

  (:objects
    room1 room2 room3 room4 - room
    door1 door2 - door
  )

  (:init
    (connected room1 room2 door1)
    (connected room2 room1 door1)
    (connected room2 room3 door2)
    (connected room2 room4 door2)
    (connected room3 room2 door2)
    (connected room4 room2 door2)
    (closed door1)
    (closed door2)
    (at room1)
    (coin-at room2)
    (coin-at room3)
    (coin-at room4)
  )

  (:goal (and
    (collected room2)
    (collected room3)
    (collected room4)
  ))
)"""

In [56]:
result = run_solver(df, pf, "dual-bfws-ffparser")
result

{'call': 'timeout 30 planutils run dual-bfws-ffparser -- domain problem plan',
 'output': {'plan': '(OPEN-DOOR DOOR1)\n(MOVE ROOM1 ROOM2 DOOR1)\n(COLLECT-COIN ROOM2)\n(OPEN-DOOR DOOR2)\n(MOVE ROOM2 ROOM3 DOOR2)\n(COLLECT-COIN ROOM3)\n(MOVE ROOM3 ROOM2 DOOR2)\n(MOVE ROOM2 ROOM4 DOOR2)\n(COLLECT-COIN ROOM4)\n'},
 'output_type': 'generic',
 'stderr': '',
 'stdout': ' --- OK.\n Match tree built with 11 nodes.\n\nPDDL problem description loaded: \n\tDomain: COINCOLLECTOR\n\tProblem: COINCOLLECTORPROBLEM\n\t#Actions: 11\n\t#Fluents: 14\nGoals found: 3\nGoals_Edges found: 3\nStarting search with 1-BFWS...\n--[3 / 0]--\n--[3 / 1]--\n--[3 / 2]--\n--[3 / 3]--\n--[2 / 0]--\n--[2 / 3]--\n--[1 / 0]--\n--[1 / 3]--\n--[0 / 0]--\n--[0 / 3]--\nTotal time: 0.000156\nNodes generated during search: 21\nNodes expanded during search: 14\nPlan found with cost: 9\nFast-BFS search completed in 0.000156 secs\n'}

## Baseline

In [24]:
from openai import OpenAI
import json

client = OpenAI()

# Function to directly get actions from GPT
def run_gpt_for_actions_baseline(prompt):
    response = client.chat.completions.create(
        model='gpt-4o',
        messages=[{"role": "user", "content": prompt}],
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )

    response_content = response.choices[0].message.content

    if response_content.startswith("```json"):
        response_content = response_content.lstrip("```json").rstrip("```").strip()

    result = json.loads(response_content)

    actions = result.get("actions", None)

    if actions is None:
        raise ValueError("Missing 'actions' in the response. Check the prompt or the model output.")

    return actions

# Modified small loop to generate actions
def llm_to_actions_baseline(brief_obs, valid_actions, overall_memory=None, large_loop_error_message=None):
    prompt = f"""
        You are in an environment that you explore step by step. Based on your observations, generate a series of valid actions to progress in the environment.
        Here are your current observations: {brief_obs}
        Here are some valid actions you can take: {valid_actions}
        Your goal is to explore new locations and interact with the environment effectively. Ensure actions are logical and do not repeat unnecessarily.

        Additional context:
        {overall_memory if overall_memory else "No additional memory available."}

        If there are errors or obstacles, here is the message:
        {large_loop_error_message if large_loop_error_message else "No errors or obstacles mentioned."}

        Provide the output in strict JSON format like this:
        {{
            "actions": ["action1", "action2", ...]
        }}
    """
    actions = run_gpt_for_actions_baseline(prompt)
    return actions

env = TextWorldExpressEnv(envStepLimit=100)

NUM_LOCATIONS = 11
env.load(gameName="coin", gameParams=f"numLocations={NUM_LOCATIONS},numDistractorItems=0,includeDoors=1,limitInventorySize=0")

obs, infos = env.reset(seed=1, gameFold="train", generateGoldPath=True)

print("Observations: "+obs)
print("Gold path: " + str(env.getGoldActionSequence()))
print("Valid Actions: " + str(infos['validActions']))
print("taskDescription: " + str(infos['taskDescription']))
task_description = infos['taskDescription']
valid_actions = sorted(infos['validActions'])
valid_actions.remove('look around')
valid_actions.remove('inventory')

MAX_STEPS = 20

brief_obs = "Action: look around\n" + summarize_obs(obs)+'\n' # initial definition
print(brief_obs)

action_queue = []
obs_queue = []
df = ""
pf = ""
all_actions = []
successful_actions = []
edit = False
end_game = False

overall_memory = brief_obs
overall_memory_dic = [{"type": "action", "content": 'look around'},\
                        {"type": "observation", "content": summarize_obs(obs)}]
entire_output = brief_obs

# Modified small loop logic
for step_id in range(0, MAX_STEPS):
    print(f"\n====Step {step_id}====")

    within_step_tries = 0
    action_passed = False
    large_loop_error_message = ""

    while within_step_tries < 5 and not action_passed:
        print(f'----Larger Loop No. {within_step_tries}----')
        print(f'Successful actions: {successful_actions}')
        within_step_tries += 1

        action_queue = []
        tem_action_queue = []

        if within_step_tries > 1:  # Reset environment for subsequent attempts
            env = TextWorldExpressEnv(envStepLimit=100)
            NUM_LOCATIONS = 11
            env.load(gameName="coin", gameParams=f"numLocations={NUM_LOCATIONS},numDistractorItems=0,includeDoors=1,limitInventorySize=0")
            obs, infos = env.reset(seed=1, gameFold="train", generateGoldPath=True)
            for successful_action in successful_actions:
                obs, reward, done, infos = env.step(successful_action)

        # Generate actions directly from GPT
        # valid_actions = ["look around", "open door", "move", "take coin"]
        actions = llm_to_actions_baseline(summarize_obs(obs), valid_actions, overall_memory, large_loop_error_message)

        # if not actions:
        #     # end_game = True
        #     continue

        if actions:
            action_queue.extend(actions)
            tem_action_queue.extend(actions) # temporary action queue to put in successful_actions
            all_actions.extend(actions) # to detect duplicated
        else:
            # end_game = True
            continue

        # action_queue = actions
        # tem_action_queue = actions[:]

        while action_queue:
            taken_action = action_queue.pop(0)
            obs, reward, done, infos = env.step(taken_action)

            if "coin" in obs:
                taken_action = "take coin"
                obs, reward, done, infos = env.step(taken_action)
                end_game = True
                print("Coin found!")
                break

            brief_obs = f"Action: {taken_action}\n{summarize_obs(obs)}\n"
            obs_queue.append(brief_obs)
            print(f"> {taken_action}")
            print(brief_obs)

            # Define action passed
            if "You can't move there, the door is closed." in brief_obs:
                action_passed = False
                large_loop_error_message = f"This is the action you take: {taken_action}. \
                    The door that you are moving to is closed. \
                    You should first open door to that direction then move there!"
                break
            elif "That is already open." in brief_obs:
                action_passed = False
                large_loop_error_message = f"This is the action you take: {taken_action}. \
                    You try to open a door that is already open. You already visited here. Make sure the status of door is correct."
                break
            elif "I'm not sure what you mean." in brief_obs:
                action_passed = False
                # --TODO-- Error message:
                if "open door" in taken_action:
                    large_loop_error_message = f'This is the action you take: {taken_action}. \
                        When you try to open door, there is no door here or there is nothing in this direction.\
                        If there is no door, you can directly move to that direction.\n'
                elif "move" in taken_action:
                    large_loop_error_message = f'This is the action you take: {taken_action}. \
                        You cannot move to that direction. Review the predicate of your actions and the problem files to check the status.'
                else:
                    large_loop_error_message = f'This is the action you take: {taken_action}. \
                        You got the environment error!'
                break

            overall_memory += brief_obs
            overall_memory_dic.append({"type": "action", "content": taken_action})
            overall_memory_dic.append({"type": "observation", "content": summarize_obs(obs)})

            if not action_queue:
                action_passed = True
                successful_actions.extend(tem_action_queue)

        if (within_step_tries == 5 and not action_passed) or end_game:
            end_game = True
            break

    if end_game:
        break

python(76989) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Observations: You are in the kitchen. In one part of the room you see a stove. There is also an oven. You also see a fridge that is closed. In another part of the room you see a counter, that has nothing on it. In one part of the room you see a kitchen cupboard that is closed. There is also a cutlery drawer that is closed. You also see a trash can that is closed. In another part of the room you see a dishwasher that is closed. In one part of the room you see a dining chair, that has nothing on it. 
To the South you see a closed patio door. To the West you see a closed plain door. 
Gold path: ['look around', 'open door to south', 'open door to west', 'move south', 'open door to west', 'move east', 'open door to north', 'move west', 'move east', 'move west', 'move east', 'move north', 'take coin']
Valid Actions: ['look around', 'close door to west', 'move west', 'open door to south', 'open door to west', 'inventory', 'move south', 'close door to south']
taskDescription: Your task is to s

python(76991) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


> open door to south
Action: open door to south
You open the wood door, revealing the laundry room. 

> move south
Action: move south
You are in the laundry room. Through an open wood door, to the North you see the bathroom. 

> look around
Action: look around
You are in the laundry room. Through an open wood door, to the North you see the bathroom. 

> open door to west
Action: open door to west
Unknown action: I'm not sure what you mean.

----Larger Loop No. 2----
Successful actions: ['open door to south', 'move south', 'look around', 'close door to west', 'open door to west', 'move west', 'open door to south', 'move south']


python(76992) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


> move north
Action: move north
You are in the living room. Through an open wood door, to the South you see the bathroom. Through an open patio door, to the East you see the backyard. 

> move north
Action: move north
Unknown action: I'm not sure what you mean.

----Larger Loop No. 3----
Successful actions: ['open door to south', 'move south', 'look around', 'close door to west', 'open door to west', 'move west', 'open door to south', 'move south']


python(76999) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


> move north
Action: move north
You are in the living room. Through an open wood door, to the South you see the bathroom. Through an open patio door, to the East you see the backyard. 

> open door to west
Action: open door to west
Unknown action: I'm not sure what you mean.

----Larger Loop No. 4----
Successful actions: ['open door to south', 'move south', 'look around', 'close door to west', 'open door to west', 'move west', 'open door to south', 'move south']


python(77006) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


> open door to west
Action: open door to west
You open the wood door, revealing the corridor. 

> move west
Action: move west
You are in the corridor. Through an open wood door, to the East you see the bathroom. To the West you see a closed wood door. 

> look around
Action: look around
You are in the corridor. Through an open wood door, to the East you see the bathroom. To the West you see a closed wood door. 

> open door to south
Action: open door to south
Unknown action: I'm not sure what you mean.



## CoinCollector

In [25]:
def file_to_path(domain_content, problem_content, domain_filename="domain.pddl", problem_filename="problem.pddl"):
    with open(domain_filename, 'w') as domain_file:
        domain_file.write(domain_content)
    
    with open(problem_filename, 'w') as problem_file:
        problem_file.write(problem_content)

    path_to_df = "/Users/krystalgong/Documents/GitHub/pddlego-df/"+domain_filename
    path_to_pf = "/Users/krystalgong/Documents/GitHub/pddlego-df/"+problem_filename

    return path_to_df, path_to_pf

def plan_to_path(plan, plan_filename="plan.txt"):
    with open(plan_filename, 'w') as plan_file:
        plan_file.write(plan)

    path_to_plan = "/Users/krystalgong/Documents/GitHub/pddlego-df/plan.txt"

    return path_to_plan

In [26]:
import subprocess

def run_pddl_parser(domain_file, problem_file=None):
    # Define the path to your Parser executable
    parser_path = "/Users/krystalgong/Desktop/Harry/VAL-master/build/macos64/Release/bin/Parser"
    domain_path, problem_path = file_to_path(domain_file, problem_file)
    
    # Check if both domain and problem files are provided
    if problem_file:
        command = [parser_path, domain_path, problem_path]
    else:
        command = [parser_path, domain_path]
    
    try:
        # Run the Parser and capture the output
        result = subprocess.run(command, capture_output=True, text=True)
        
        # Check if there is any error
        if result.returncode != 0:
            print(f"Error: {result.stderr}")
            return None
        
        # Return the stdout (output) of the parser
        return result.stdout
    
    except FileNotFoundError as e:
        print(f"Parser not found: {e}")
        return None

df = """
(define (domain exploration)
  (:requirements :strips :typing :existential-preconditions :negative-preconditions)
  (:types location direction)
  (:predicates 
    (at ?loc - location)
    (connected ?loc1 ?loc2 - location)
    (door-open ?loc1 ?loc2 - location)
    (visited ?loc - location)
  )
  (:action open-door
    :parameters (?loc1 - location ?loc2 - location ?dir - direction)
    :precondition (and (connected ?loc1 ?loc2) (not (door-open ?loc1 ?loc2)))
    :effect (door-open ?loc1 ?loc2)
  )
  (:action move
    :parameters (?from - location ?to - location ?dir - direction)
    :precondition (and (at ?from) (connected ?from ?to) (door-open ?from ?to))
    :effect (and (not (at ?from)) (at ?to) (visited ?to))
  )
)
"""

pf = """(define (problem explore-environment)
  (:domain exploration)
  (:objects 
    backyard kitchen driveway street living_room - location
    north south east west - direction
  )
  (:init 
    (at living_room)
    (connected backyard kitchen)
    (connected backyard driveway)
    (connected backyard street)
    (connected backyard living_room)
    (connected living_room backyard)
    (door-open backyard kitchen)
    (door-open backyard living_room)
    (visited backyard)
    (visited living_room)
  )
  (:goal (exists (?loc - location) (and (at ?loc) (not (visited ?loc)))))
)
"""

# Run the parser and capture the output
parser_output = run_pddl_parser(df, pf)
print(parser_output)

if parser_output:
    output_lst = parser_output.split('\n')
    error_message = ''
    error = False
    for i in output_lst:
        if i.startswith('Errors:') or error:
            error_message += i
            error_message += '\n'
            error = True
    print(error_message)

python(92359) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


File: /Users/krystalgong/Documents/GitHub/pddlego-df/domain.pddl

(domain)
name: exploration
req: 4118
:strips :typing :existential-preconditions :negative-preconditions 
predicates: 
      (pred_decl)
      head: 
         (symbol)
         name: at
      args: 
         (typed_symbol_list<>)
            (symbol)
            name: loc[0x7f8852f065a0]

            type: 
               (symbol)
               name: location[0x7f8852f063c0]

               type: (NULL)
               either_types: (NULL)
            either_types: (NULL)
      (pred_decl)
      head: 
         (symbol)
         name: connected
      args: 
         (typed_symbol_list<>)
            (symbol)
            name: loc1[0x7f8852f06730]

            type: 
               (symbol)
               name: location[0x7f8852f063c0]

               type: (NULL)
               either_types: (NULL)
            either_types: (NULL)
            (symbol)
            name: loc2[0x7f8852f067a0]

            type: 
            

In [27]:
import subprocess

def validate_pddl(domain_file, problem_file, plan=None):
    # The path to the Validate executable
    validate_executable = "/Users/krystalgong/Desktop/Harry/VAL-master/build/macos64/Release/bin/Validate"

    domain_path, problem_path = file_to_path(domain_file, problem_file)
    plan_path = plan_to_path(plan)
    # domain_file = "/Users/krystalgong/Desktop/harry/code_kg/pddlego-df/domain.pddl"
    # problem_file = "/Users/krystalgong/Desktop/harry/code_kg/pddlego-df/problem.pddl"
    # plan_file = "/Users/krystalgong/Desktop/harry/code_kg/pddlego-df/plan.txt"
    
    # Construct the command
    command = [validate_executable, "-v", domain_path, problem_path]

    # plan should be a txt file
    # print(domain_path, problem_path, plan_path)
    if plan_path:
      command.append(plan_path)
    
    try:
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        
        print("Validation Output:\n", result.stdout)
        
    except subprocess.CalledProcessError as e:
        print("Error:\n", e.stderr)

df = """(define (domain environment)
  (:requirements :strips :typing :negative-preconditions :disjunctive-preconditions)
  (:types
    location
    direction
  )
  (:predicates
    (at ?loc - location)
    (visited ?loc - location)
    (connected ?loc1 - location ?loc2 - location ?dir - direction)
    (closed_door ?loc1 - location ?loc2 - location)
  )
  (:action move
    :parameters (?loc1 - location ?loc2 - location ?dir - direction)
    :precondition (and (at ?loc1) (connected ?loc1 ?loc2 ?dir) (not (closed_door ?loc1 ?loc2)))
    :effect (and (not (at ?loc1)) (at ?loc2))
  )
  (:action open_door
    :parameters (?loc1 - location ?loc2 - location)
    :precondition (and (at ?loc1) (closed_door ?loc1 ?loc2))
    :effect (not (closed_door ?loc1 ?loc2))
  )
)"""

pf = """(define (problem exploration)
  (:domain environment)
  (:objects
    kitchen l1 - location
    north south east west - direction
  )
  (:init
    (at kitchen)
    (visited kitchen)
    (connected kitchen l1 south)
    (connected l1 kitchen north)
    (closed_door kitchen l1)
    (closed_door l1 kitchen)
  )
  (:goal 
    (exists (?x - location)
        (and
            (not (visited ?x))
            (at ?x)
        )
    )
  )
)"""


validate_pddl(df, pf,"(open_door kitchen l1)\n(move kitchen l1 south)")

python(92380) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Validation Output:
 Type-checking move
...action passes type checking.
Type-checking open_door
...action passes type checking.
Checking plan: /Users/krystalgong/Documents/GitHub/pddlego-df/plan.txt
Plan to validate:

Plan size: 2
1:
(open_door kitchen l1)
 
2:
(move kitchen l1 south)
 

Plan Validation details
-----------------------

Checking next happening (time 1)
Deleting (closed_door kitchen l1)

Checking next happening (time 2)
Deleting (at kitchen)
Adding (at l1)
Plan executed successfully - checking goal
Plan valid
Final value: 2 

Successful plans:
Value: 2
 /Users/krystalgong/Documents/GitHub/pddlego-df/plan.txt 2 




In [34]:
def error_message(domain_file, problem_file):
    # Run Parser and get error message
    parser_output = run_pddl_parser(domain_file, problem_file)
    err_message = ''
    if parser_output:
        output_lst = parser_output.split('\n')
        # err_message = ''
        error = False
        for i in output_lst:
            if i.startswith('Errors:') or error:
                if "Warning" in i:
                    continue
                error = True
                err_message += i
                err_message += '\n'
                
    err_message = err_message.replace('/Users/krystalgong/Documents/GitHub/pddlego-df/', '')
    return err_message

In [29]:
# need to avoid correct repeated actions: west, west, west...
def detect_duplicates(action_lst, threshold):
    n = len(action_lst)
    
    for seq_len in range(1, n // 2 + 1):
        # Get the last sequence of this length
        sequence = action_lst[-seq_len:]
        
        # Count how many times this sequence appears continuously at the end
        count = 1
        for i in range(2, threshold + 1):
            if action_lst[-i * seq_len: - (i - 1) * seq_len] == sequence:
                count += 1
            else:
                break
        
        # If the sequence repeats at least 'threshold' times, return True
        if count >= threshold:
            return True

    # If no sequence repeats up to the threshold, return False
    return False

actions = ["move", "collect-coin", "move", "collect-coin", 'help',"move", "collect-coin", "move", "collect-coin", "move", "collect-coin"]
threshold = 3
detect_duplicates(actions, threshold)

True

In [30]:
def apply_edit_domain(prev_df, edit_json):
    output = []
    predicate_section = False
    action_name = None
    
    for line in prev_df.split("\n"):
        stripped_line = line.strip()
        
        # Handle predicates
        if "(:predicates" in line:
            predicate_section = True
            output.append(line)
        elif predicate_section and stripped_line == ")":
            predicate_section = False
            # Add new predicates if specified
            if "predicates" in edit_json and "add" in edit_json["predicates"]:
                for pred in edit_json["predicates"]["add"]:
                    output.append("    " + pred)
            output.append(line)
        elif predicate_section:
            if "predicates" in edit_json:
                if "replace" in edit_json["predicates"] and stripped_line in edit_json["predicates"]["replace"]:
                    output.append("    " + edit_json["predicates"]["replace"][stripped_line])
                elif "delete" in edit_json["predicates"] and stripped_line in edit_json["predicates"]["delete"]:
                    continue
                else:
                    output.append(line)
            else:
                output.append(line)
        
        # Handle actions
        elif "(:action" in line:
            action_name = stripped_line.split()[1]
            output.append(line)
        elif action_name and ":precondition" in stripped_line:
            if "action" in edit_json and action_name in edit_json["action"] and "precondition" in edit_json["action"][action_name]:
                # Replace precondition
                output.append("        :precondition " + " ".join(edit_json["action"][action_name]["precondition"]))
                while ")" not in stripped_line:  # Skip lines until end of precondition
                    stripped_line = next(prev_df.split("\n")).strip()
            else:
                output.append(line)
        elif action_name and ":effect" in stripped_line:
            if "action" in edit_json and action_name in edit_json["action"] and "effect" in edit_json["action"][action_name]:
                # Replace effect
                output.append("        :effect " + " ".join(edit_json["action"][action_name]["effect"]))
                while ")" not in stripped_line:  # Skip lines until end of effect
                    stripped_line = next(prev_df.split("\n")).strip()
            else:
                output.append(line)
        else:
            output.append(line)
    
    return "\n".join(output)

def apply_edit_problem(prev_pf, edit_json):
    output = []
    obj_section = False
    init_section = False
    goal_section = False
    
    for line in prev_pf.split("\n"):
        stripped_line = line.strip()
        
        # Handle objects
        if "(:objects" in line:
            obj_section = True
            output.append(line)
        elif obj_section and stripped_line == ")":
            obj_section = False
            # Add new objects if specified
            if "objects" in edit_json and "add" in edit_json["objects"]:
                for obj in edit_json["objects"]["add"]:
                    output.append("        " + obj)
            output.append(line)
        elif obj_section:
            if "objects" in edit_json:
                if "replace" in edit_json["objects"] and stripped_line in edit_json["objects"]["replace"]:
                    output.append("        " + edit_json["objects"]["replace"][stripped_line])
                elif "delete" in edit_json["objects"] and stripped_line in edit_json["objects"]["delete"]:
                    continue
                else:
                    output.append(line)
            else:
                output.append(line)
        
        # Handle init
        elif "(:init" in line:
            init_section = True
            output.append(line)
        elif init_section and stripped_line == ")":
            init_section = False
            # Add new init statements if specified
            if "init" in edit_json and "add" in edit_json["init"]:
                for init in edit_json["init"]["add"]:
                    output.append("        " + init)
            output.append(line)
        elif init_section:
            if "init" in edit_json:
                if "replace" in edit_json["init"] and stripped_line in edit_json["init"]["replace"]:
                    output.append("        " + edit_json["init"]["replace"][stripped_line])
                elif "delete" in edit_json["init"] and stripped_line in edit_json["init"]["delete"]:
                    continue
                else:
                    output.append(line)
            else:
                output.append(line)
        
        # Handle goal
        elif "(:goal" in line:
            goal_section = True
            output.append(line)
        elif goal_section:
            goal_section = False
            if "goal" in edit_json:
                output.append("        " + " ".join(edit_json["goal"]))
                while ")" not in stripped_line:  # Skip lines until end of goal
                    stripped_line = next(prev_pf.split("\n")).strip()
            else:
                output.append(line)
        
        else:
            output.append(line)
    
    return "\n".join(output)

def apply_edit(prev_df, prev_pf, edit_json_df, edit_json_pf):
    update_df, update_pf = prev_df, prev_pf  # Default to original if no changes are made
    if edit_json_df == {}:
        update_df = apply_edit_domain(prev_df, edit_json_df)
    if edit_json_pf == {}:
        update_pf = apply_edit_problem(prev_pf, edit_json_pf)
    return update_df, update_pf


In [31]:
def llm_to_pddl_check_delta(obs, taken_action, prev_df="", prev_pf=""):

    prompt_edit = """
        Please provide the edit output in JSON format, including the edit suggestions for a domain file as 'df' and the edit suggestions for a problem file as 'pf'. 
        The output format should be: {{"df": "...", "pf": "..."}}
        You will modify the following df and pf using add, delete, and replace operations (in a JSON format). 
        You SHOULD NOT provide a domain file and a problem file directly.
        If you think the current observation is correct with your previous generated files, then provide empty JSON: 
        {{"df": "{}", "pf": "{}"}}
        This is the structure for df edit file if you think this observation is different from previous generated version, remember to add bracket:
        {
        "predicates": {
            "add": ["(predicates to add)"],
            "replace": {"(old)": "(new)"},
            "delete": ["(predicates to delete)"]
            },
        "action": {
            "open-door": {
                "precondition": ["(entire full new precondition for open-door)"], # directly replace the whole precondition
                "effect": ["(entire full new effect for open-door)"] # so as effect
                },
            "move": {
                "precondition": []
                "effect": []
                }
            }
        }
        This is the structure for pf edit file:
        {
        "objects": {
            "add": [],
            "replace": {},
            "delete": []
            },
        "init": {
            "add": [],
            "replace": {},
            "delete": []
            },
        "goal": ["(entire full new goal)"]
        }
    """

    prompt_obs_action = f"""
        Background: You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Your task is always to keep exploration and go to a location you have not visited yet.

        Here is your last action {taken_action} and the observation after taking that action: {summarize_obs(obs)}
    """ 

    prompt_prev_files = f"""
        This is previous domain file: {prev_df}
        This is previous problem file: {prev_pf}
    """
        
    prompt = prompt_edit + prompt_obs_action + prompt_prev_files

    if "I'm not sure what you mean." in summarize_obs(obs) and "open door" in taken_action:
        print('\n There is no door here or there is nothing in this direction.') # how to utilize this? previous obs. how to extract locations
        prompt += 'Additionally notes: You are trying to open a door but there is no door here or there is nothing in this direction.'
    elif "open door" in taken_action:
        prompt += "\n Additionally notes: You opened a door and revealing the above place. \
            Is this what you are expecting based on your previous generated problem file? \
            If yes, you should generate the empty edit json file! \
            If not, do you need to edit the previous file, mainly problem file? Provide the edit json! Thank you!"

    edit_json_df, edit_json_pf = run_gpt(prompt)

    print(edit_json_df, edit_json_pf)

    edit_json_df = json.loads(edit_json_df)
    edit_json_pf = json.loads(edit_json_pf)
    
    zero_edit = {
        "objects": {
            "add": [],
            "replace": {},
            "delete": []
            },
        "init": {
            "add": [],
            "replace": {},
            "delete": []
            },
        "goal": []
        }
    if edit_json_pf == zero_edit or edit_json_pf == {}:
        return True, 0, 0
    
    # print("Edit json:",edit_json_df, edit_json_pf)
    print(edit_json_df, edit_json_pf, type(edit_json_pf), edit_json_pf=={})
    df, pf = apply_edit(prev_df, prev_pf, edit_json_df, edit_json_pf)

    # err = error_message(df, pf)
    # check err and its df & pf here:
    # ....
    return False, df, pf

### Start simulating

In [57]:
from textworld_express import TextWorldExpressEnv

env = TextWorldExpressEnv(envStepLimit=100)

NUM_LOCATIONS = 11
env.load(gameName="coin", gameParams=f"numLocations={NUM_LOCATIONS},numDistractorItems=0,includeDoors=1,limitInventorySize=0")

obs, infos = env.reset(seed=1, gameFold="train", generateGoldPath=True)

print("Observations: "+obs)
print("Gold path: " + str(env.getGoldActionSequence()))
print("Valid Actions: " + str(infos['validActions']))
print("taskDescription: " + str(infos['taskDescription']))
task_description = infos['taskDescription']
valid_actions = sorted(infos['validActions'])
valid_actions.remove('look around')
valid_actions.remove('inventory')

def llm_to_pddl(brief_obs, prev_df="", prev_pf="", prev_err="", have_error=False, have_duplicate=False, edit=False, overall_memory=None, large_loop_error_message = None):
    prompt_format = f"""
        Please provide the output in strict JSON format, without any additional text or explanation, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'. 
        The format should strictly be:
            {{
            "df": "...",
            "pf": "..."
            }}
    """

    prompt_edit = """
        Please provide the output in JSON format, including the edit suggestions for a domain file as 'df' and the edit suggestions for a problem file as 'pf'. 
        The output format should be: {{"df": "...", "pf": "..."}}
        You will modify the following df and pf using add, delate, and replace operations (in a JSON format). 
        You SHOULD NOT provide a domain file and a problem file directly.
        This is the structure for df edit file, remember to add bracket:
        {
        "predicates": {
            "add": ["(predicates to add)"],
            "replace": {"(old)": "(new)"},
            "delete": ["(predicates to delete)"]
            },
        "action": {
            "open-door": {
                "precondition": ["(entire full new precondition for open-door)"], # directly replace the whole precondition
                "effect": ["(entire full new effect for open-door)"] # so as effect
                },
            "move": {
                "precondition": []
                "effect": []
                }
            }
        }
        This is the structure for pf edit file:
        {
        "objects": {
            "add": [],
            "replace": {},
            "delete": []
            },
        "init": {
            "add": [],
            "replace": {},
            "delete": []
            },
        "goal": ["(entire full new goal)"]
        }
    """

    prompt_obs_action = f"""
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here are your current observations: {brief_obs}
        Here are some valid actions you can take: {valid_actions}
        You should generate df and pf strictly follow this valid actions. There are in total 2 actions, that should exactly be the following two:
        1. :action open-door
            :parameters (?loc1 - location ?loc2 - location ?dir - direction)
        2. :action move
            :parameters (?from - location ?to - location ?dir - direction)
        You should have a goal in the problem file like this: 
        (:goal 
            (at ?location)
        ) where location should be somewhere not visited
        Note: in problem file's init, you shouldn't have "not ()" but only the single status
    """ 

    prompt_prev_files = f"""
        This is previous domain file: {prev_df}
        This is previous problem file: {prev_pf}
        This is all the memory you have in this game including each action and its corresponding observations: {overall_memory}
    """

    prompt_new_obs = f"""
        Now modify those two files according to the new observations and notes. Fix any errors you made in the previous setting according to the new observation.
        Generate updated files based on your new observation.
    """

    # error from Parser(df, pf)
    prompt_error_parser = f"""
        You made some mistakes when generating those files. Here is the error message: {prev_err}
        Now modify those two files according to the error message.
    """

    # error from simulation environment
    prompt_simulation_error = f"""
        You have already generate files according to the observations. The df and pf can generate actions but after simulating,
        it got those errors: {large_loop_error_message}. Please review both files and fix them.
        Now modify those two files according to the error message.
    """

    prompt_duplicate_note = """
        You are repeating the same sequence of actions for at least three times. You may stuck in one location or have the wrong goal.
        You should revise your problem file to avoid the repeat.
        Remember your goal is always to keep exploration and go to a location you have not visited yet, i.e. your goal should be go to other not visited location but shouldn't be at one fixed location.
    """

    if not edit:
        prompt = prompt_format
    else:
        prompt = prompt_edit

    # all prompts should have observations and actions
    prompt += prompt_obs_action

    if prev_df and prev_pf:
        prompt += prompt_prev_files

        if not have_error:
            prompt += prompt_new_obs
        else:
            prompt += prompt_error_parser
        
        if large_loop_error_message:
            prompt += prompt_simulation_error

    if have_duplicate:
        print('You have duplicated error message!!')
        prompt += prompt_duplicate_note


    if edit:
        edit_json_df, edit_json_pf = run_gpt(prompt)
        # print("Edit json:",edit_json_df, edit_json_pf)
        df, pf = apply_edit(prev_df, prev_pf, edit_json_df, edit_json_pf)
        # print("New df and pf:", df, pf)
    else:
        df, pf = run_gpt(prompt)

    err = error_message(df, pf)
    # check err and its df & pf here:
    # ....
    return df, pf, err, prompt

def get_action_from_pddl(df, pf):
    # run_fast_downward(path_to_df, path_to_pf)
    result = run_solver(df, pf, "dual-bfws-ffparser")
    action = result['output']['plan']
    return map_actions(action)

python(99210) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Observations: You are in the kitchen. In one part of the room you see a stove. There is also an oven. You also see a fridge that is closed. In another part of the room you see a counter, that has nothing on it. In one part of the room you see a kitchen cupboard that is closed. There is also a cutlery drawer that is closed. You also see a trash can that is closed. In another part of the room you see a dishwasher that is closed. In one part of the room you see a dining chair, that has nothing on it. 
To the South you see a closed patio door. To the West you see a closed plain door. 
Gold path: ['look around', 'open door to south', 'open door to west', 'move south', 'open door to west', 'move east', 'open door to north', 'move west', 'move north', 'move west', 'move east', 'move west', 'move east', 'move west', 'move east', 'move west', 'move east', 'move west', 'move east', 'move south', 'move north', 'move south', 'move south', 'move north', 'move south', 'move north', 'move east', 'mov

In [58]:
# 1/12/25 working on
import sys

MAX_STEPS = 20

brief_obs = "Action: look around\n" + summarize_obs(obs)+'\n' # initial definition
print(brief_obs)

action_queue = []
obs_queue = []
df = ""
pf = ""
all_actions = []
successful_actions = []
edit = False
end_game = False

overall_memory = brief_obs

for step_id in range(0, MAX_STEPS):
    print(f"\n====Step {step_id}====")

    within_step_tries = 0
    action_passed = False

    large_loop_error_message = ""

    # Under step#, it should repeat until run all actions and found no error
    while within_step_tries < 5 and not action_passed:
        print(f'----Larger Loop No. {within_step_tries}----')
        print(f'successful_actions: {successful_actions}')
        within_step_tries += 1

        if within_step_tries > 1: # second or third ... time in the larger loop
            # reset env by refilling successful actions (stupid but useful)
            env = TextWorldExpressEnv(envStepLimit=100)
            NUM_LOCATIONS = 11
            env.load(gameName="coin", gameParams=f"numLocations={NUM_LOCATIONS},numDistractorItems=0,includeDoors=1,limitInventorySize=0")
            obs, infos = env.reset(seed=1, gameFold="train", generateGoldPath=True)
            for successful_action in successful_actions:
                obs, reward, done, infos = env.step(successful_action)

        action_queue = [] # reset action_queue
        tem_action_queue = []
        tem_memory = ""

        start_checkpoint = True
        while start_checkpoint or action_queue:
            print(f'Small Loop, action_queue: {action_queue}')
            start_checkpoint = False

            if not action_queue:
                if obs_queue:
                    brief_obs = "\n".join(obs_queue)
                    obs_queue = []
                action = ""
                
                if not df and not pf: # First step no need duplicates detection
                    num_tries = 0
                    df, pf, err, prompt = llm_to_pddl(brief_obs) # error 1 here
                    action = get_action_from_pddl(df, pf) # error 2 here
                    print("\n--Small Loop--",num_tries)
                    print("Error:", err)
                    print("Prompt:", prompt)
                    print("df and pf:", df, pf)
                    print("Actions from solver(df, pf)", action)

                    while not action and num_tries < 5:
                        df, pf, err, prompt = llm_to_pddl(brief_obs, df, pf, err, True, False, edit)
                        action = get_action_from_pddl(df, pf)
                        num_tries += 1
                        print("\n--Small Loop--",num_tries)
                        print("Error:", err)
                        print("Prompt:", prompt)
                        print("df and pf:", df, pf)
                        print("Actions from solver(df, pf)", action)
                else:
                    num_tries = 0
                    # Every time read new error message from larger loop
                    # In llm_to_pddl, detect if new large loop error message exists
                    df, pf, err, prompt = llm_to_pddl(brief_obs, df, pf, err, False, detect_duplicates(all_actions, 3), edit, overall_memory, large_loop_error_message) # need to add new error message
                    action = get_action_from_pddl(df, pf)
                    print("\n--Small Loop--",num_tries)
                    print("Error:", err)
                    print("Prompt:", prompt)
                    print("df and pf:", df, pf)
                    print("Actions from solver(df, pf)", action)

                    while not action and num_tries < 5:
                        df, pf, err, prompt = llm_to_pddl(brief_obs, df, pf, err, True, detect_duplicates(all_actions, 3), edit, overall_memory, large_loop_error_message)
                        action = get_action_from_pddl(df, pf)
                        num_tries += 1
                        print("\n--Small Loop--",num_tries)
                        print("Error:", err)
                        print("Prompt:", prompt)
                        print("df and pf:", df, pf)
                        print("Actions from solver(df, pf)", action)

                if action:
                    action_queue.extend(action)
                    tem_action_queue.extend(action) # temporary action queue to put in successful_actions
                    all_actions.extend(action) # to detect duplicated
                else:
                    end_game = True
                    break

            print("Current action_queue:", action_queue)
            
            taken_action = action_queue.pop(0)
            # Feedback from plan-environment interaction
            # err_validate = validate_pddl(df, pf, taken_action)
            # print(err_validate)

            obs, reward, done, infos = env.step(taken_action)

            # Directly end the game if found coin
            if "coin" in obs:
                taken_action = "take coin"
                obs, reward, done, infos = env.step(taken_action)
                end_game = True
                print('Coin found!')
                break
            
            action_text = "Action: " + taken_action + "\n"
            obs_text = summarize_obs(obs) + "\n"

            brief_obs = action_text + obs_text

            obs_queue.append(brief_obs)
            print(">", taken_action)
            print(brief_obs)
            # =====
            # Define action passed
            if "You can't move there, the door is closed." in brief_obs:
                large_loop_error_message = f"This is the action you take: {taken_action}. \
                    The door that you are moving to is closed. \
                    You should first open door to that direction then move there!"
                break
            elif "That is already open." in brief_obs:
                large_loop_error_message = f"This is the action you take: {taken_action}. \
                    You try to open a door that is already open. You already visited here. Make sure the status of door is correct."
                break
            elif "I'm not sure what you mean." in brief_obs:
                action_passed = False
                print('===error message here!', obs_text)
                if "open door" in taken_action:
                    large_loop_error_message = f'This is the action you take: {taken_action}. \
                        When you try to open door, there is no door here or there is nothing in this direction.\
                        If there is no door, you can directly move to that direction.\n'
                elif "move" in taken_action:
                    large_loop_error_message = f'This is the action you take: {taken_action}. \
                        You cannot move to that direction. Review the predicate of your actions and the problem files to check the status.'
                else:
                    large_loop_error_message = f'This is the action you take: {taken_action}. \
                        You got the environment error!'
                
                break

            # append into overall memory and dictionary format
            tem_memory += brief_obs

            # It should be the last step and passed all actions
            if not action_queue:
                action_passed = True
                successful_actions.extend(tem_action_queue)
                overall_memory += tem_memory

        if (within_step_tries == 5 and not action_passed) or end_game:
            end_game = True
            break

    if end_game:
        break

Action: look around
You are in the kitchen. To the South you see a closed patio door. To the West you see a closed plain door. 


====Step 0====
----Larger Loop No. 0----
successful_actions: []
Small Loop, action_queue: []


python(99216) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



--Small Loop-- 0


Prompt: 
        Please provide the output in strict JSON format, without any additional text or explanation, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'. 
        The format should strictly be:
            {
            "df": "...",
            "pf": "..."
            }
    
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here a

python(99228) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



--Small Loop-- 0


Prompt: 
        Please provide the output in strict JSON format, without any additional text or explanation, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'. 
        The format should strictly be:
            {
            "df": "...",
            "pf": "..."
            }
    
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here a

python(99253) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



--Small Loop-- 1


Prompt: 
        Please provide the output in strict JSON format, without any additional text or explanation, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'. 
        The format should strictly be:
            {
            "df": "...",
            "pf": "..."
            }
    
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here a

python(99282) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



--Small Loop-- 2


Prompt: 
        Please provide the output in strict JSON format, without any additional text or explanation, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'. 
        The format should strictly be:
            {
            "df": "...",
            "pf": "..."
            }
    
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here a

python(99284) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Small Loop, action_queue: []


python(99294) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



--Small Loop-- 0


Prompt: 
        Please provide the output in strict JSON format, without any additional text or explanation, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'. 
        The format should strictly be:
            {
            "df": "...",
            "pf": "..."
            }
    
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here a

python(99296) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Small Loop, action_queue: []
You have duplicated error message!!


python(99299) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



--Small Loop-- 0


Prompt: 
        Please provide the output in strict JSON format, without any additional text or explanation, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'. 
        The format should strictly be:
            {
            "df": "...",
            "pf": "..."
            }
    
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here a

python(99300) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



--Small Loop-- 1


Prompt: 
        Please provide the output in strict JSON format, without any additional text or explanation, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'. 
        The format should strictly be:
            {
            "df": "...",
            "pf": "..."
            }
    
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here a

python(99301) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



--Small Loop-- 2


Prompt: 
        Please provide the output in strict JSON format, without any additional text or explanation, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'. 
        The format should strictly be:
            {
            "df": "...",
            "pf": "..."
            }
    
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here a

python(99303) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



--Small Loop-- 3


Prompt: 
        Please provide the output in strict JSON format, without any additional text or explanation, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'. 
        The format should strictly be:
            {
            "df": "...",
            "pf": "..."
            }
    
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here a

python(99304) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



--Small Loop-- 4


Prompt: 
        Please provide the output in strict JSON format, without any additional text or explanation, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'. 
        The format should strictly be:
            {
            "df": "...",
            "pf": "..."
            }
    
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here a

python(99306) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



--Small Loop-- 5


Prompt: 
        Please provide the output in strict JSON format, without any additional text or explanation, including a PDDL domain file as 'df' and a PDDL problem file as 'pf'. 
        The format should strictly be:
            {
            "df": "...",
            "pf": "..."
            }
    
        You are in an environment that you explore step by step. You must build and update PDDL files of the environment based on only your observations. 
        Do not create something not appeared in the observations and also do not miss any observations e.g. through closed doors you may assume a room behind.
        Do not assume that there will be a door connecting rooms.
        Your task is always to keep exploration and go to a location you have not visited yet.
        In other words, your goal should go to other not visited location.
        If you enter a room, make sure you put everything you observed such as the direction in the problem file.
        Here a