In [16]:
import re
import json
import time
import numpy as np
from typing import Callable, Optional, Tuple, Dict, Any, List

from scienceworld import ScienceWorldEnv
import textworld.gym
from textworld import EnvInfos

from alfworld.agents.environment import get_environment
import alfworld.agents.modules.generic as generic

# Science World

In [2]:
# Pretty-print helpers (optional)
def hr(title: Optional[str] = None):
    print("\n" + "-"*90)
    if title:
        print(title)
        print("-"*90)

def pkey(d: Dict[str, Any], key: str, label: Optional[str] = None):
    if key in d:
        print(f"{label or key}: {d[key]}")

# Environment bootstrap (works with or without a custom JAR)
def init_env(jar_path: Optional[str] = None, env_step_limit: int = 100) -> ScienceWorldEnv:
    """
    If jar_path is None or "", ScienceWorldEnv uses the built-in jar.
    """
    env = ScienceWorldEnv("", jar_path or "", envStepLimit=env_step_limit)
    return env

def list_tasks(env: ScienceWorldEnv) -> List[str]:
    tasks = env.get_task_names()
    for i, t in enumerate(tasks):
        print(f"{i:2d}: {t}")
    return tasks

def load_task(
    env: ScienceWorldEnv,
    task_name: str,
    var_num: int = 0,
    simplifications: Optional[str] = "easy",
    generate_gold_path: bool = True
):
    """
    simplifications: e.g., "easy" or a comma-joined string like "teleportAction,openDoors,selfWateringFlowerPots,noElectricalAction"
    """
    env.load(task_name, var_num, simplifications or "", generateGoldPath=generate_gold_path)
    obs, info = env.reset()
    return obs, info

In [3]:
# Use default envs defined in science world
env = init_env(jar_path=None, env_step_limit=100)
tasks = list_tasks(env)

 0: boil
 1: change-the-state-of-matter-of
 2: chemistry-mix
 3: chemistry-mix-paint-secondary-color
 4: chemistry-mix-paint-tertiary-color
 5: find-animal
 6: find-living-thing
 7: find-non-living-thing
 8: find-plant
 9: freeze
10: grow-fruit
11: grow-plant
12: identify-life-stages-1
13: identify-life-stages-2
14: inclined-plane-determine-angle
15: inclined-plane-friction-named-surfaces
16: inclined-plane-friction-unnamed-surfaces
17: lifespan-longest-lived
18: lifespan-longest-lived-then-shortest-lived
19: lifespan-shortest-lived
20: measure-melting-point-known-substance
21: measure-melting-point-unknown-substance
22: melt
23: mendelian-genetics-known-plant
24: mendelian-genetics-unknown-plant
25: power-component
26: power-component-renewable-vs-nonrenewable-energy
27: test-conductivity
28: test-conductivity-of-unknown-substances
29: use-thermometer


In [4]:
# # Choose task to load / play around with
# task_idx = 6
# task_name = tasks[task_idx]
# print(f"Loading task #{task_idx}: {task_name}.")

# # Load task and print initial observation
# obs, info = load_task(env, task_name=task_name, var_num=1, simplifications="easy", generate_gold_path=True)
# print(f"Task: {env.get_task_description()}")
# print(f"Goal Progress: {env.get_goal_progress()}")
# gold_actions = env.get_gold_action_sequence()
# print(gold_actions)

# for idx, a in enumerate(gold_actions):
#     print(f"Step {idx+1}:\n{obs}")
#     print(f"Action: {a}")
#     obs, reward, isCompleted, infos = env.step(a)
#     print(f"Reward: {reward:.2f} | Score: {infos['score']:.2f} |  Completed? {isCompleted}\n\n")

In [5]:
play_scienceworld = False

if play_scienceworld:
    # Load task    
    task_idx = 8
    task_name = tasks[task_idx]
    print(f"Loading task #{task_idx}: {task_name}.")
    obs, info = load_task(env, task_name=task_name, var_num=1, simplifications="easy", generate_gold_path=True)
    print(f"Task: {env.get_task_description()}")
    score, moves, done = 0, 0, False
    
    # Render loop
    reward = 0
    while not done:
        command = input("> ")
        obs, score, done, infos = env.step(command)
        reward += score
        print(f"Observation:\n{obs}\n")
        print(f"Score: {score}; Total Reward: {reward}; Done: {done}\n####################\n\n")
        moves += 1

    print(f"Moves: {moves}; Reward: {reward}\n")
    print(f"Gold Path:\n{env.get_gold_action_sequence()}")
    print(f"Goal Progress:\n{env.get_goal_progress()}")
    env.reset()

# Text World

In [23]:
# Helper function
def extract_initial_info(text: str) -> str:
    # Find the LAST occurrence of 6+ consecutive '$' and capture everything after it
    m = re.search(r"\${6,}(?![\s\S]*\${6,})([\s\S]*)\Z", text)
    if not m:
        return ""
    out = m.group(1)
    # Trim only leading spaces/newlines after the art
    out = re.sub(r"^[ \t\r\n]+", "", out)
    return out

def print_obs(text: str) -> str:
    # Match everything up to the last '>'
    m = re.search(r"^(.*)>(?!.*>)", text, re.DOTALL)
    if not m:
        return text.strip()
    # Group(1) = everything before the last '>'
    return m.group(1).rstrip(" \t\r\n")

In [6]:
!rm -f tw_games/*
# !tw-make custom --world-size 5 --nb-objects 10 --quest-length 5 --output tw_games/custom_game.z8
!tw-make tw-cooking --recipe 4 --take 2 --go 1 --open --cook --cut --drop --split train --output tw_games/custom_cooking.z8

Global seed: 27107
Game generated: /home/lucas/code/seq-decision/sequential-decision-processors/data/textworld/raw/tw_games/custom_cooking.z8


In [27]:
play_textworld = True

filename = "custom_cooking"
if play_textworld:
    env_id = textworld.gym.register_game(f"tw_games/{filename}.z8", max_episode_steps=50)
    env = textworld.gym.make(env_id)
    json_info = json.load(open(f"tw_games/{filename}.json"))['metadata']
    
    # Print various game info
    # print(json_info['goal'])
    # print(json_info['walkthrough'])
    # print(json_info['max_score'])

    obs, infos = env.reset()
    print(f"Initial Observation:\n{print_obs(extract_initial_info(obs))}\n##################\n")
    
    score, moves, done = 0, 0, False
    while not done:
        # command = input("> ")
        command = json_info['walkthrough'][moves]
        if command == "stop":
            break
        obs, score, done, infos = env.step(command)
        print(f"Action:\n> {command}\n\nObservation:{print_obs(obs)}\n\nScore: {score}\n#################\n")
        moves += 1

    print(f"Moves: {moves}; Score: {score} / {json_info['max_score']}; Done: {done}")
    print(f"Gold Path:\n{json_info['walkthrough']}")
    env.close()

Initial Observation:
You are hungry! Let's cook a delicious meal. Check the cookbook in the kitchen for the recipe. Once done, enjoy your meal!

-= Kitchen =-
You are in a kitchen. A typical kind of place.

You scan the room for a fridge, and you find a fridge. You make out a closed oven in the room. You see a table. On the table you see a cookbook and a knife. You make out a counter. The counter is vast. On the counter you make out a red apple and a red hot pepper. You see a gleam over in a corner, where you can see a stove. But the thing hasn't got anything on it.
##################

Action:
> inventory

Observation:
You are carrying: a white onion, a raw yellow potato, a red onion, a yellow bell pepper and a raw purple potato.

Score: 0
#################

Action:
> examine cookbook

Observation:
You open the copy of "Cooking: A Modern Approach (3rd Ed.)" and start reading:

Recipe #1
---------
Gather all following ingredients and follow the directions to prepare this tasty meal.

In