In [2]:
import random

class GridWorld:
    def __init__(self, size=3):
        self.size = size
        self.agent_position = [0, 0]
        self.goal_position = [size - 1, size - 1]

    def reset(self):
        self.agent_position = [0, 0]
        return self.agent_position

    def step(self, action):
        x, y = self.agent_position
        if action == 'up':
            x = max(0, x - 1)
        elif action == 'down':
            x = min(self.size - 1, x + 1)
        elif action == 'left':
            y = max(0, y - 1)
        elif action == 'right':
            y = min(self.size - 1, y + 1)

        self.agent_position = [x, y]

        reward = -1
        done = False
        if self.agent_position == self.goal_position:
            reward = 1
            done = True
        return self.agent_position, reward, done

    def render(self):
        for i in range(self.size):
            for j in range(self.size):
                if [i, j] == self.agent_position:
                    print("A", end=" ")
                elif [i, j] == self.goal_position:
                    print("G", end=" ")
                else:
                    print(".", end=" ")
            print()
        print()

class RandomPolicy:
    def __init__(self, actions):
        self.actions = actions

    def select_action(self):
        return random.choice(self.actions)

def simulate_policy(policy, env):
    state = env.reset()
    total_reward = 0
    step_count = 0

    done = False
    while not done:
        action = policy.select_action()
        print(f"Step {step_count + 1}:")
        print(f"Action: {action}")
        new_state, reward, done = env.step(action)
        env.render()
        print(f"Reward: {reward}")
        total_reward += reward
        step_count += 1
        if done:
            print("Goal Reached!")
            break

    return total_reward, step_count

def main():
    size = 3
    env = GridWorld(size=size)
    actions = ['up', 'down', 'left', 'right']

    random_policy = RandomPolicy(actions)
    random_reward, random_steps = simulate_policy(random_policy, env)

    print('Random Policy:')
    print(f"Total Reward: {random_reward}")
    print(f"Steps to Reach Goal: {random_steps}")

if __name__ == "__main__":
    main()


Step 1:
Action: left
A . . 
. . . 
. . G 

Reward: -1
Step 2:
Action: left
A . . 
. . . 
. . G 

Reward: -1
Step 3:
Action: left
A . . 
. . . 
. . G 

Reward: -1
Step 4:
Action: down
. . . 
A . . 
. . G 

Reward: -1
Step 5:
Action: left
. . . 
A . . 
. . G 

Reward: -1
Step 6:
Action: down
. . . 
. . . 
A . G 

Reward: -1
Step 7:
Action: up
. . . 
A . . 
. . G 

Reward: -1
Step 8:
Action: down
. . . 
. . . 
A . G 

Reward: -1
Step 9:
Action: right
. . . 
. . . 
. A G 

Reward: -1
Step 10:
Action: down
. . . 
. . . 
. A G 

Reward: -1
Step 11:
Action: down
. . . 
. . . 
. A G 

Reward: -1
Step 12:
Action: right
. . . 
. . . 
. . A 

Reward: 1
Goal Reached!
Random Policy:
Total Reward: -10
Steps to Reach Goal: 12
