In [85]:
import gymnasium as gym
import nle
from nle.nethack import actions as nh_actions
from nle.nethack.actions import ACTIONS
import numpy as np
from collections import deque

print("ACTIONS:", ACTIONS)
print("nh_actions:", nh_actions)



# Map (dx, dy) to direction enums
direction_to_action = {
    (0, -1): nh_actions.CompassDirection.N,
    (1, 0): nh_actions.CompassDirection.E,
    (0, 1): nh_actions.CompassDirection.S,
    (-1, 0): nh_actions.CompassDirection.W,
    (1, -1): nh_actions.CompassDirection.NE,
    (1, 1): nh_actions.CompassDirection.SE,
    (-1, 1): nh_actions.CompassDirection.SW,
    (-1, -1): nh_actions.CompassDirection.NW,
}

# Fallback directions to try in order
fallback_dirs = list(direction_to_action.values())



: 

In [86]:
def get_agent_position(obs):
    return int(obs["blstats"][1]), int(obs["blstats"][0])  # x, y

In [87]:
def is_walkable(char_code, glyph_code):
    walkable_chars = [ord('.'), ord('#'), ord('>'), ord('<'), ord('+'), ord('-')]  # floor, corridor, stairs, doors
    if char_code not in walkable_chars:
        return False
    if 0 <= glyph_code < 4000:  # likely a monster
        return False
    return True

In [88]:
def find_stairs(obs):
    locs = np.argwhere(obs["chars"] == ord('>'))
    if len(locs) > 0:
        y, x = locs[0]
        return (x, y)
    return None

In [89]:
def is_hungry(message: str) -> bool:
    hunger_signals = ["Fainting", "hungry", "You are beginning to feel weak"]
    return any(phrase in message for phrase in hunger_signals)

In [90]:
def find_food_letter(obs):
    for letter, oclass in zip(obs["inv_letters"], obs["inv_oclasses"]):
        if oclass == 5:  # 5 = FOOD_CLASS
            return chr(letter)
    return None

In [91]:
def find_food_positions(obs):
    positions = np.argwhere(obs["chars"] == ord('%'))
    if len(positions) > 0:
        y, x = positions[0]
        return (x, y)
    return None


In [92]:
def is_food_at_agent(obs):
    x, y = get_agent_position(obs)
    return chr(obs["chars"][y, x]) == '%'


In [93]:
def bfs(start, goal, chars, glyphs):
    visited = set()
    queue = deque([(start, [])])
    while queue:
        (x, y), path = queue.popleft()
        if (x, y) == goal:
            return path
        for dx, dy in direction_to_action:
            nx, ny = x + dx, y + dy
            if 0 <= ny < chars.shape[0] and 0 <= nx < chars.shape[1]:
                if (nx, ny) not in visited and is_walkable(chars[ny, nx], glyphs[ny, nx]):
                    visited.add((nx, ny))
                    queue.append(((nx, ny), path + [(dx, dy)]))
    return None


In [94]:
def find_unseen(pos, seen_map, chars, glyphs):
    visited = set()
    queue = deque([(pos, [])])
    while queue:
        (x, y), path = queue.popleft()
        for dx, dy in direction_to_action:
            nx, ny = x + dx, y + dy
            if 0 <= ny < chars.shape[0] and 0 <= nx < chars.shape[1]:
                if (nx, ny) not in visited:
                    visited.add((nx, ny))
                    if not seen_map[ny, nx] and is_walkable(chars[ny, nx], glyphs[ny, nx]):
                        return (nx, ny)
                    if is_walkable(chars[ny, nx], glyphs[ny, nx]):
                        queue.append(((nx, ny), path + [(dx, dy)]))
    return None

In [None]:
# --- Run agent ---
env = gym.make("NetHackScore-v0", render_mode="human")
obs, _ = env.reset()
seen_map = np.zeros_like(obs["chars"], dtype=bool)

items_collected = 0
descended = False
step = 0
terminated = False
truncated = False
eating = False
eat_letter = None
picking_up = False
pickup_wait = 0



while True:
    env.render()
    step += 1
    pos = get_agent_position(obs)
    seen_map |= obs["chars"] != 32
    level = obs["blstats"][12]
    message = "".join(chr(x) for x in obs["message"] if x != 0)
    hungry = is_hungry(message)


    # 🎯 GOAL TRACKING
    if "You pick up" in message:
        items_collected += 1
        print(f"📦 Picked up item! Total: {items_collected}")
    if level >= 2:
        descended = True
    if items_collected >= 5 and descended:
        print(f"🎉 SUCCESS: Collected {items_collected} items and reached level {level} in {step} steps!")
        break

    # 💀 Game over check
    if terminated or truncated:
        print("💀 Agent died or game ended before reaching both goals.")
        break

    # 🍽️ Eating logic
    if hungry:
        if eating:
            print(f"👄 Eating food item '{eat_letter}'")
            action_enum = eat_letter
            eating = False
            eat_letter = None

        else:
            eat_letter = find_food_letter(obs)
            if eat_letter:
                print(f"🍽️ Found food in inventory: {eat_letter}")
                action_enum = nh_actions.Command.EAT
                eating = True

            elif is_food_at_agent(obs):
                if not picking_up:
                    print("🤲 Standing on food — issuing PICKUP")
                    action_enum = nh_actions.Command.PICKUP
                    picking_up = True
                    pickup_wait = 2  # wait two steps before checking inventory
                else:
                    print("⏳ Waiting after pickup...")
                    pickup_wait -= 1
                    if pickup_wait <= 0:
                        picking_up = False
                    action_enum = nh_actions.MiscDirection.WAIT
            else:
                food_loc = find_food_positions(obs)
                if food_loc:
                    print(f"🔍 Moving to ground food at {food_loc}")
                    path = bfs(pos, food_loc, obs["chars"], obs["glyphs"])
                    if path and len(path) > 0:
                        dx, dy = path[0]
                        action_enum = direction_to_action.get((dx, dy), nh_actions.MiscDirection.WAIT)
                    else:
                        action_enum = nh_actions.MiscDirection.WAIT
                else:
                    print("😵 Hungry, no food found.")
                    action_enum = nh_actions.MiscDirection.WAIT

        eat_letter = find_food_letter(obs)
        if eat_letter:
            print(f"🍽️ Hungry! Found food in inventory: {eat_letter}")
            action_enum = nh_actions.Command.EAT
            eating = True
        else:
            ground_food = find_food_positions(obs)
            if ground_food:
                if pos == ground_food and not picking_up:
                    print(f"🤲 Standing on food — issuing PICKUP")
                    action_enum = nh_actions.Command.PICKUP
                    picking_up = True
                elif picking_up:
                    print("✅ Done picking up food.")
                    picking_up = False
                    action_enum = nh_actions.MiscDirection.WAIT
                else:
                    print(f"🔍 Moving toward food at {ground_food}")
                    path = bfs(pos, ground_food, obs["chars"], obs["glyphs"])
                    if path and len(path) > 0:
                        dx, dy = path[0]
                        action_enum = direction_to_action.get((dx, dy), nh_actions.MiscDirection.WAIT)
                    else:
                        action_enum = nh_actions.MiscDirection.WAIT
            else:
                print("😵 Hungry and no food found.")
                action_enum = nh_actions.MiscDirection.WAIT
    else:
        # 🧠 Standard logic
        goal = find_stairs(obs)
        action_enum = nh_actions.MiscDirection.WAIT

        if goal and pos == goal:
            action_enum = nh_actions.MiscDirection.DOWN
        elif goal:
            path = bfs(pos, goal, obs["chars"], obs["glyphs"])
            if path and len(path) > 0:
                dx, dy = path[0]
                action_enum = direction_to_action.get((dx, dy), nh_actions.MiscDirection.WAIT)
        else:
            unseen = find_unseen(pos, seen_map, obs["chars"], obs["glyphs"])
            if unseen:
                path = bfs(pos, unseen, obs["chars"], obs["glyphs"])
                if path and len(path) > 0:
                    dx, dy = path[0]
                    action_enum = direction_to_action.get((dx, dy), nh_actions.MiscDirection.WAIT)
            else:
                for fallback in fallback_dirs:
                    test_index = ACTIONS.index(fallback)
                    test_obs, _, _, _, _ = env.step(test_index)
                    new_pos = get_agent_position(test_obs)
                    if new_pos != pos:
                        obs = test_obs
                        action_enum = fallback
                        break
                else:
                    action_enum = nh_actions.MiscDirection.WAIT

    # 🚀 Execute
    if isinstance(action_enum, str):
        action = ord(action_enum)
    else:
        action = ACTIONS.index(action_enum)

    print(f"Step: {step}, Items: {items_collected}, Level: {level}, Action: {action_enum}")
    obs, _, terminated, truncated, _ = env.step(action)

env.close()

