In [9]:
import gym
from gym import spaces
import numpy as np
import matplotlib.pyplot as plt

class LawnmowerEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    def __init__(self):
        self.states = [(i, j) for i in range(4) for j in range(4)]
        self.start_state = (0, 0)
        self.goal_state = (3, 3)
        self.action_space = spaces.Discrete(4)  # Up, Down, Right, Left
        self.observation_space = spaces.Discrete(len(self.states))
        self.rewards = np.array([-6, -5, 6, 5])
        self.grid = np.zeros((4, 4))
        self.grid[1, 1] = -1  # rock at (1, 1)
        self.grid[2, 2] = 1   # battery at (2, 2)
        self.reset()

    def reset(self):
        self.current_state = self.start_state
        return self.states.index(self.current_state)

    def step(self, action):
        if action == 0: # Up
            next_state = (self.current_state[0]-1, self.current_state[1])
        elif action == 1: # Down
            next_state = (self.current_state[0]+1, self.current_state[1])
        elif action == 2: # Right
            next_state = (self.current_state[0], self.current_state[1]+1)
        elif action == 3: # Left
            next_state = (self.current_state[0], self.current_state[1]-1)

        # Check if the next state is within the grid boundaries
        if next_state[0] < 0:
            next_state = (0, next_state[1])
        elif next_state[0] > 3:
            next_state = (3, next_state[1])
        elif next_state[1] < 0:
            next_state = (next_state[0], 0)
        elif next_state[1] > 3:
            next_state = (next_state[0], 3)

        reward = self.rewards[self.states.index(next_state)]

        self.current_state = next_state

        done = self.current_state == self.goal_state

        return self.states.index(self.current_state), reward, done, {}


    def render(self, mode='human'):
        plt.imshow(self.grid, cmap='jet')
        current_state_xy = (self.current_state[1], self.current_state[0])
        plt.scatter(current_state_xy[0], current_state_xy[1], color='r')
        plt.text(1, -0.5, "Rock -1, Battery +1")
        plt.axis('off')
        plt.show()


In [10]:
env = LawnmowerEnv()
state = env.reset()
for i in range(10):
    action = env.action_space.sample()
    next_state, reward, done, _ = env.step(action)
    env.render()
    print(f"Step {i}: state={next_state}, action={action}, reward={reward}")
    if done:
        break
env.close()


IndexError: index 4 is out of bounds for axis 0 with size 4