In [None]:
import gymnasium as gym
from gymnasium.envs.registration import register
import src.environments.monty_hall as mh

discrete = True

In [None]:
# Make the environment in human-render mode
register(
    id="MontyHall-v0",
    entry_point="src.environments:MontyHallEnv"
)
env = gym.make("MontyHall-v0", render_mode="human")
obs, info = env.reset()

if discrete:
    # Our state space is 2D, like `[[4], [1], [0]]`, an array of each door and its Door State value. However,
    # our Tabular Q Learning implements a simpler 1D Discrete space, which is also more efficient for contiguous access.
    env = mh.MontyHallDiscreteWrapper(env) 

# The env already created and keeps a pygame module reference internally
pygame = env.unwrapped._renderer._pygame
clock = pygame.time.Clock()
running = True

print("Monty Hall — press 1-9 to pick a door, r to reset, Esc/q to quit.")

while running:
    # Handle window / keyboard events
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

        elif event.type == pygame.KEYDOWN:
            key = event.key
            if key in (pygame.K_ESCAPE, pygame.K_q):
                running = False

            elif key == pygame.K_r:
                obs, info = env.reset()

            # Number keys -> door indices 0-8 (supports up to 9 doors)
            elif pygame.K_1 <= key <= pygame.K_9:
                door_idx = key - pygame.K_1
                if door_idx < env.action_space.n:
                    obs, reward, terminated, truncated, info = env.step(door_idx)
                    print(f"[Debug] {obs}")
                    if terminated or truncated:
                        print(
                            f"Episode finished — reward={reward:.0f}. "
                            "Press 'r' to play again."
                        )                        

    # Keep event loop responsive (30 fps cap – env handles its own drawing)
    clock.tick(30)

env.close()