**GridWorld Environment**

In [9]:
import numpy as np

class GridWorld:
    def __init__(self, size=3):
        self.size = size
        self.agent_position = [0, 0]  # Starting position
        self.goal_position = [size - 1, size - 1]  # Goal position

    def reset(self):
        self.agent_position = [0, 0]
        return self.agent_position

    def step(self, action):
        x, y = self.agent_position

        if action == 'up':
            x = max(0, x - 1)
        elif action == 'down':
            x = min(self.size - 1, x + 1)
        elif action == 'left':
            y = max(0, y - 1)
        elif action == 'right':
            y = min(self.size - 1, y + 1)

        self.agent_position = [x, y]

        reward = -1
        done = False
        if self.agent_position == self.goal_position:
            reward = 1
            done = True

        return self.agent_position, reward, done

    def render(self):
        for i in range(self.size):
            for j in range(self.size):
                if self.agent_position == [i, j]:
                    print("A", end=" ")
                elif self.goal_position == [i, j]:
                    print("G", end=" ")
                else:
                    print(".", end=" ")
            print()
        print()


**Agent**

In [10]:
import random

class Agent:
  def __init__(self, actions):
    self.actions = actions

  def select_action(self):
    return random.choice(self.actions)

In [12]:
def main():
    size = 3
    env = GridWorld(size=size)
    agent = Agent(actions=['up', 'down', 'left', 'right'])

    state = env.reset()
    print("Initial State:")
    env.render()

    done = False
    while not done:
        action = agent.select_action()
        state, reward, done = env.step(action)
        env.render()
        print(f"Action: {action}, Reward: {reward}")

        if done:
            print("Goal Reached!")

if __name__ == "__main__":
    main()


Initial State:
A . . 
. . . 
. . G 

A . . 
. . . 
. . G 

Action: left, Reward: -1
A . . 
. . . 
. . G 

Action: up, Reward: -1
A . . 
. . . 
. . G 

Action: up, Reward: -1
A . . 
. . . 
. . G 

Action: up, Reward: -1
. . . 
A . . 
. . G 

Action: down, Reward: -1
A . . 
. . . 
. . G 

Action: up, Reward: -1
A . . 
. . . 
. . G 

Action: up, Reward: -1
. . . 
A . . 
. . G 

Action: down, Reward: -1
. . . 
A . . 
. . G 

Action: left, Reward: -1
A . . 
. . . 
. . G 

Action: up, Reward: -1
A . . 
. . . 
. . G 

Action: up, Reward: -1
. . . 
A . . 
. . G 

Action: down, Reward: -1
A . . 
. . . 
. . G 

Action: up, Reward: -1
A . . 
. . . 
. . G 

Action: left, Reward: -1
. A . 
. . . 
. . G 

Action: right, Reward: -1
A . . 
. . . 
. . G 

Action: left, Reward: -1
. A . 
. . . 
. . G 

Action: right, Reward: -1
A . . 
. . . 
. . G 

Action: left, Reward: -1
A . . 
. . . 
. . G 

Action: up, Reward: -1
. A . 
. . . 
. . G 

Action: right, Reward: -1
. A . 
. . . 
. . G 

Action: up, Rewa