In [None]:
import gym
from gym import spaces
import numpy as np

In [None]:
class GridWorldEnv(gym.Env):
    def __init__(self, grid_size=10, blocked_cells=None, start_pos=(0, 0), target_pos=(9, 9)):
        super(GridWorldEnv, self).__init__()
        self.grid_size = grid_size
        self.start_pos = start_pos
        self.target_pos = target_pos

        # Actions: 0=Up, 1=Down, 2=Left, 3=Right
        self.action_space = spaces.Discrete(4)
        # Observations: Agent's position
        self.observation_space = spaces.Tuple((
            spaces.Discrete(grid_size),
            spaces.Discrete(grid_size)
        ))

        # Initialize the grid
        self.blocked_cells = blocked_cells or []
        self.reset()

    def reset(self):
        self.agent_pos = self.start_pos
        return self.agent_pos

    def step(self, action):
        x, y = self.agent_pos
        if action == 0 and x > 0:  # Up
            x -= 1
        elif action == 1 and x < self.grid_size - 1:  # Down
            x += 1
        elif action == 2 and y > 0:  # Left
            y -= 1
        elif action == 3 and y < self.grid_size - 1:  # Right
            y += 1

        if (x, y) not in self.blocked_cells:
            self.agent_pos = (x, y)

        done = self.agent_pos == self.target_pos
        reward = 1 if done else -0.01  # Encourage faster completion
        return self.agent_pos, reward, done, {}

    def render(self, mode='human'):
        grid = np.zeros((self.grid_size, self.grid_size), dtype=str)
        grid[:] = '.'
        for cell in self.blocked_cells:
            grid[cell] = '#'
        grid[self.target_pos] = 'G'  # Goal
        grid[self.agent_pos] = 'A'  # Agent
        print('\n'.join(' '.join(row) for row in grid))
        print()