In [5]:
import numpy as np

class GridWorldEnv:
    def __init__(self, grid_size=(5, 5), goal_state=(4, 4), start_state=(0, 0)):
        self.grid_size = grid_size
        self.goal_state = goal_state
        self.start_state = start_state
        self.current_state = start_state
        self.actions = ['UP', 'DOWN', 'LEFT', 'RIGHT']
        self.num_actions = len(self.actions)

    def reset(self):
        """Resets the environment to the start state."""
        self.current_state = self.start_state
        return self.current_state

    def step(self, action):
        """Takes an action and returns the next state, reward, and whether the goal is reached."""
        x, y = self.current_state
        if action == 0:  # UP
            x = max(0, x - 1)
        elif action == 1:  # DOWN
            x = min(self.grid_size[0] - 1, x + 1)
        elif action == 2:  # LEFT
            y = max(0, y - 1)
        elif action == 3:  # RIGHT
            y = min(self.grid_size[1] - 1, y + 1)

        next_state = (x, y)
        reward = 1 if next_state == self.goal_state else -0.1
        done = next_state == self.goal_state
        self.current_state = next_state
        return next_state, reward, done

    def sample_action(self):
        """Samples a random action."""
        return np.random.choice(self.num_actions)

    def render(self):
        """Prints the grid world with the agent's current position."""
        grid = [['.' for _ in range(self.grid_size[1])] for _ in range(self.grid_size[0])]
        x, y = self.current_state
        grid[x][y] = 'A'
        gx, gy = self.goal_state
        grid[gx][gy] = 'G'
        print("\n".join([" ".join(row) for row in grid]))
        print()


# Example usage
if __name__ == "__main__":
    env = GridWorldEnv()
    state = env.reset()
    done = False

    print("Initial Environment:")
    env.render()

    while not done:
        action = env.sample_action()
        next_state, reward, done = env.step(action)
        print(f"Action Taken: {env.actions[action]} | Reward: {reward} | Next State: {next_state}")
        env.render()


Initial Environment:
A . . . .
. . . . .
. . . . .
. . . . .
. . . . G

Action Taken: RIGHT | Reward: -0.1 | Next State: (0, 1)
. A . . .
. . . . .
. . . . .
. . . . .
. . . . G

Action Taken: RIGHT | Reward: -0.1 | Next State: (0, 2)
. . A . .
. . . . .
. . . . .
. . . . .
. . . . G

Action Taken: RIGHT | Reward: -0.1 | Next State: (0, 3)
. . . A .
. . . . .
. . . . .
. . . . .
. . . . G

Action Taken: UP | Reward: -0.1 | Next State: (0, 3)
. . . A .
. . . . .
. . . . .
. . . . .
. . . . G

Action Taken: RIGHT | Reward: -0.1 | Next State: (0, 4)
. . . . A
. . . . .
. . . . .
. . . . .
. . . . G

Action Taken: UP | Reward: -0.1 | Next State: (0, 4)
. . . . A
. . . . .
. . . . .
. . . . .
. . . . G

Action Taken: RIGHT | Reward: -0.1 | Next State: (0, 4)
. . . . A
. . . . .
. . . . .
. . . . .
. . . . G

Action Taken: UP | Reward: -0.1 | Next State: (0, 4)
. . . . A
. . . . .
. . . . .
. . . . .
. . . . G

Action Taken: RIGHT | Reward: -0.1 | Next State: (0, 4)
. . . . A
. . . . .
. . .