In [2]:
import random
import numpy as np

class GridWorld:
    def __init__(self, size=3):
        self.size = size
        self.agent_position = [0, 0]
        self.goal_position = [2, 2]

    def reset(self):
        self.agent_position = [0, 0]
        return self.agent_position

    def step(self, action):
        x, y = self.agent_position

        if action == 'up':
            x = max(0, x - 1)
        elif action == 'down':
            x = min(self.size - 1, x + 1)
        elif action == 'left':
            y = max(0, y - 1)
        elif action == 'right':
            y = min(self.size - 1, y + 1)

        self.agent_position = [x, y]

        reward = -1
        done = False

        if self.agent_position == self.goal_position:
            reward = 1
            done = True

        return self.agent_position, reward, done

    def render(self):
        for i in range(self.size):
            for j in range(self.size):
                if [i, j] == self.agent_position:
                    print("A", end=" ")
                elif [i, j] == self.goal_position:
                    print("G", end=" ")
                else:
                    print(".", end=" ")
            print()
        print()

class RandomAgent:
    def __init__(self, actions):
        self.actions = actions

    def select_action(self):
        return random.choice(self.actions)

def main():
    size = 3
    env = GridWorld(size=size)
    agent = RandomAgent(actions=['up', 'down', 'left', 'right'])
    state = env.reset()
    print("Initial State:")
    env.render()

    done = False
    step_count = 0
    while not done:
        action = agent.select_action()
        print(f"Step {step_count + 1}:")
        print(f"Action: {action}")
        state, reward, done = env.step(action)
        env.render()
        print(f"Reward: {reward}")

        if done:
            print("Goal Reached!")
        step_count += 1

if __name__ == "__main__":
    main()


Initial State:
A . . 
. . . 
. . G 

Step 1:
Action: right
. A . 
. . . 
. . G 

Reward: -1
Step 2:
Action: down
. . . 
. A . 
. . G 

Reward: -1
Step 3:
Action: down
. . . 
. . . 
. A G 

Reward: -1
Step 4:
Action: down
. . . 
. . . 
. A G 

Reward: -1
Step 5:
Action: down
. . . 
. . . 
. A G 

Reward: -1
Step 6:
Action: up
. . . 
. A . 
. . G 

Reward: -1
Step 7:
Action: left
. . . 
A . . 
. . G 

Reward: -1
Step 8:
Action: left
. . . 
A . . 
. . G 

Reward: -1
Step 9:
Action: down
. . . 
. . . 
A . G 

Reward: -1
Step 10:
Action: down
. . . 
. . . 
A . G 

Reward: -1
Step 11:
Action: left
. . . 
. . . 
A . G 

Reward: -1
Step 12:
Action: right
. . . 
. . . 
. A G 

Reward: -1
Step 13:
Action: left
. . . 
. . . 
A . G 

Reward: -1
Step 14:
Action: down
. . . 
. . . 
A . G 

Reward: -1
Step 15:
Action: down
. . . 
. . . 
A . G 

Reward: -1
Step 16:
Action: down
. . . 
. . . 
A . G 

Reward: -1
Step 17:
Action: right
. . . 
. . . 
. A G 

Reward: -1
Step 18:
Action: left
. . . 
. . . 
