In [1]:
import gymnasium as gym
from gymnasium import spaces

# Constants
GRID_SIZE = 5
CELL_SIZE = 100
FPS = 4

# Colors
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
RED = (255, 0, 0)
GREEN = (0, 255, 0)
BLUE = (0, 0, 255)

# Actions
LEFT = 0
RIGHT = 1
UP = 2
DOWN = 3

# Phase
PHASE_GO = 'go'
PHASE_RETURN = 'return'

# Rewards
STEP_REWARD = -1
GOAL_REWARD = 10
RETURN_REWARD = 20

In [2]:
class HomeMazeEnv(gym.Env):
    def __init__(self, grid_size=GRID_SIZE):
        super(HomeMazeEnv, self).__init__()
        self.grid_size = grid_size
        self.start_pos = (0, 0)
        self.goal_pos = (grid_size - 1, grid_size - 1)
        self.current_pos = self.start_pos
        self.phase = PHASE_GO

        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Box(
            low=np.array([0, 0], dtype=np.int32),
            high=np.array([grid_size - 1, grid_size - 1], dtype=np.int32),
            dtype=np.int32)

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.current_pos = self.start_pos
        self.phase = PHASE_GO
        return np.array(self.current_pos, dtype=np.int32), {}

    def step(self, action):
        x, y = self.current_pos
        if action == LEFT: x = max(0, x - 1)
        elif action == RIGHT: x = min(self.grid_size - 1, x + 1)
        elif action == UP: y = max(0, y - 1)
        elif action == DOWN: y = min(self.grid_size - 1, y + 1)
        self.current_pos = (x, y)

        terminated = False
        truncated = False
        reward = STEP_REWARD

        if self.phase == PHASE_GO and self.current_pos == self.goal_pos:
            self.phase = PHASE_RETURN
            reward = GOAL_REWARD
        elif self.phase == PHASE_RETURN and self.current_pos == self.start_pos:
            reward = RETURN_REWARD
            terminated = True

        return np.array(self.current_pos, dtype=np.int32), reward, terminated, truncated, {}

    def render(self):
        pass

    def close(self):
        pass


In [3]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
import numpy as np
import os

# 모델을 저장할 디렉토리 생성
models_dir = "models"
if not os.path.exists(models_dir):
    os.makedirs(models_dir)

env = HomeMazeEnv(grid_size=GRID_SIZE)
check_env(env)

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=200000)

# 학습된 모델 저장
model_path = os.path.join(models_dir, "ppo_maze")
model.save(model_path)
print(f"Model saved to {model_path}.zip")

# 모델 로드 테스트 (선택사항)
loaded_model = PPO.load(model_path)
print("Model loaded successfully!")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 289      |
|    ep_rew_mean     | -257     |
| time/              |          |
|    fps             | 9879     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 252        |
|    ep_rew_mean          | -220       |
| time/                   |            |
|    fps                  | 5955       |
|    iterations           | 2          |
|    time_elapsed         | 0          |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.01173005 |
|    clip_fraction        | 0.0912     |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.38

In [2]:
import pygame

class HomeMazeGameEnv:
    def __init__(self, grid_size=GRID_SIZE, cell_size=CELL_SIZE):
        self.grid_size = grid_size
        self.cell_size = cell_size
        self.window_size = grid_size * cell_size

        self.start_pos = (0, 0)
        self.goal_pos = (grid_size - 1, grid_size - 1)
        self.current_pos = self.start_pos
        self.phase = 'go'

        pygame.init()
        pygame.display.init()
        self.window = pygame.display.set_mode((self.window_size, self.window_size))
        self.clock = pygame.time.Clock()

    def _create_grid_surface(self):
        surface = pygame.Surface((self.window_size, self.window_size))
        surface.fill(WHITE)

        # Draw grid lines
        for i in range(self.grid_size + 1):
            pygame.draw.line(surface, BLACK, (i * self.cell_size, 0),
                            (i * self.cell_size, self.window_size), 2)
            pygame.draw.line(surface, BLACK, (0, i * self.cell_size),
                            (self.window_size, i * self.cell_size), 2)
        return surface

    def _draw_position(self, surface, pos, color, is_circle=False):
        x, y = pos
        if is_circle:
            pygame.draw.circle(
                surface,
                color,
                (x * self.cell_size + self.cell_size // 2,
                 y * self.cell_size + self.cell_size // 2),
                self.cell_size // 3
            )
        else:
            pygame.draw.rect(
                surface,
                color,
                pygame.Rect(
                    x * self.cell_size,
                    y * self.cell_size,
                    self.cell_size,
                    self.cell_size
                )
            )

    def reset(self):
        self.current_pos = self.start_pos
        self.phase = 'go'
        self._render_frame()

    def step(self, action):
        x, y = self.current_pos
        if action == LEFT: x = max(0, x - 1)
        elif action == RIGHT: x = min(self.grid_size - 1, x + 1)
        elif action == UP: y = max(0, y - 1)
        elif action == DOWN: y = min(self.grid_size - 1, y + 1)
        self.current_pos = (x, y)

        done = False

        if self.phase == 'go' and self.current_pos == self.goal_pos:
            self.phase = 'return'
        elif self.phase == 'return' and self.current_pos == self.start_pos:
            done = True

        self._render_frame()
        return done

    def _render_frame(self):
        # Create base grid
        canvas = self._create_grid_surface()

        # Draw positions
        self._draw_position(canvas, self.start_pos, GREEN)
        self._draw_position(canvas, self.goal_pos, RED)
        self._draw_position(canvas, self.current_pos, BLUE, is_circle=True)

        self.window.blit(canvas, canvas.get_rect())
        pygame.event.pump()
        pygame.display.flip()
        self.clock.tick(FPS)

    def close(self):
        if self.window is not None:
            pygame.display.quit()
            pygame.quit()


In [3]:
from stable_baselines3 import PPO
import numpy as np

env = HomeMazeGameEnv()
model = PPO.load("models/ppo_maze")

env.reset()
for _ in range(20):
    obs = np.array(env.current_pos, dtype=np.int32)
    action, _states = model.predict(obs)
    terminated = env.step(action)

    if terminated:
        print("귀소 성공!")
        break
else:
    print("귀소 실패!")
env.close()

2025-04-13 14:08:57.129 python[39800:31742990] +[IMKClient subclass]: chose IMKClient_Modern
2025-04-13 14:08:57.129 python[39800:31742990] +[IMKInputSession subclass]: chose IMKInputSession_Modern


귀소 성공!
