# More Gridworld

In [2]:
import random

In [17]:
class GridworldEnv():
    def __init__(self, grid_size=(5, 5), goal_state=(4, 4), obstacles=[(2,2), (3,1), (1,3)]):
        self.grid_size = grid_size
        self.goal_state = goal_state
        self.obstacles = obstacles
        self.actions = ['Up', 'Down', 'Left', 'Right']
        self.action_moves = {
            'Up': (-1, 0),
            'Down': (1, 0),
            'Left': (0, -1),
            'Right': (0, 1)
        } 

    def reset(self):
        self.state = (0, 0)
        return self.state

    def step(self, action):
        assert action in self.actions
        intended_move = self.action_moves[action]
        next_state = (self.state[0] + intended_move[0], self.state[1] + intended_move[1])
        if not (0 <= next_state[0] < self.grid_size[0] and 0 <= next_state[1] < self.grid_size[1]):
            next_state = self.state

        if next_state in self.obstacles:
            next_state = self.state

        if random.random() > 0.9:
            possible_moves = list(self.action_moves.values())
            possible_moves.remove(intended_move)
            slip_move = random.choice(possible_moves)
            slip_state = (self.state[0] + slip_move[0], self.state[1] + slip_move[1])
            if 0 <= slip_state[0] < self.grid_size[0] and 0 <= slip_state[1] < self.grid_size[1]:
                next_state = slip_state if slip_state not in self.obstacles else self.state

        if next_state == self.goal_state:
            reward = 10
            done = True
        elif next_state in self.obstacles:
            reward = -5
            done = True
        else:
            reward = -1
            done = False

        self.state = next_state
        return next_state, reward, done


    def render(self):
        grid = [['.' for _ in range(self.grid_size[1])] for _ in range(self.grid_size[0])]

        grid[self.goal_state[0]][self.goal_state[1]] = 'G'
        for obstacle in self.obstacles:
            grid[obstacle[0]][obstacle[1]] = 'X'

        agent_i, agent_j = self.state
        grid[agent_i][agent_j] = 'A'

        for row in grid:
            print(' '.join(row))
            print()

In [18]:
env = GridworldEnv()

state = env.reset()
env.render()

done = False
while not done:
    action = random.choice(env.actions)
    next_state, reward, done = env.step(action)
    print(f"Action: {action}, Next State: {next_state}, Reward: {reward}")
    env.render()

A . . . .

. . . X .

. . X . .

. X . . .

. . . . G

Action: Down, Next State: (1, 0), Reward: -1
. . . . .

A . . X .

. . X . .

. X . . .

. . . . G

Action: Down, Next State: (2, 0), Reward: -1
. . . . .

. . . X .

A . X . .

. X . . .

. . . . G

Action: Down, Next State: (3, 0), Reward: -1
. . . . .

. . . X .

. . X . .

A X . . .

. . . . G

Action: Left, Next State: (3, 0), Reward: -1
. . . . .

. . . X .

. . X . .

A X . . .

. . . . G

Action: Up, Next State: (2, 0), Reward: -1
. . . . .

. . . X .

A . X . .

. X . . .

. . . . G

Action: Up, Next State: (1, 0), Reward: -1
. . . . .

A . . X .

. . X . .

. X . . .

. . . . G

Action: Right, Next State: (1, 1), Reward: -1
. . . . .

. A . X .

. . X . .

. X . . .

. . . . G

Action: Up, Next State: (0, 1), Reward: -1
. A . . .

. . . X .

. . X . .

. X . . .

. . . . G

Action: Up, Next State: (0, 1), Reward: -1
. A . . .

. . . X .

. . X . .

. X . . .

. . . . G

Action: Down, Next State: (1, 1), Reward: -1
. . . .