In [1]:
import random

class GridWorld:
    def __init__(self, size=3):
        self.size = size
        self.agent_position = [0, 0]
        self.goal_position = [size - 1, size - 1]

    def reset(self):
        self.agent_position = [0, 0]
        return self.agent_position

    def step(self, action):
        x, y = self.agent_position
        if action == 'up':
            x = max(0, x - 1)
        elif action == 'down':
            x = min(self.size - 1, x + 1)
        elif action == 'left':
            y = max(0, y - 1)
        elif action == 'right':
            y = min(self.size - 1, y + 1)

        self.agent_position = [x, y]

        reward = -1
        done = False

        if self.agent_position == self.goal_position:
            reward = 1
            done = True
        return self.agent_position, reward, done

    def render(self):
        for i in range(self.size):
            for j in range(self.size):
                if [i, j] == self.agent_position:
                    print("A", end=" ")
                elif [i, j] == self.goal_position:
                    print("G", end=" ")
                else:
                    print(".", end=" ")
            print()
        print()

class GreedyPolicy:
    def __init__(self, actions):
        self.actions = actions

    def select_action(self, state, goal):
        x, y = state
        gx, gy = goal

        if x < gx:
            return 'down'
        elif x > gx:
            return 'up'
        elif y > gy:
            return 'left'
        elif y < gy:
            return 'right'

        return random.choice(self.actions)

def simulate_policy(policy, env):
    state = env.reset()
    total_reward = 0
    step_count = 0

    done = False
    while not done:
        action = policy.select_action(state, env.goal_position)
        print(f"Step {step_count + 1}:")
        print(f"Action: {action}")
        state, reward, done = env.step(action)
        env.render()
        print(f"Reward: {reward}")
        total_reward += reward
        step_count += 1
        if done:
            print('Goal Reached')
            break

    return total_reward, step_count

def main():
    env = GridWorld()
    actions = ['up', 'down', 'left', 'right']

    greedy_policy = GreedyPolicy(actions)
    greedy_reward, greedy_step_count = simulate_policy(greedy_policy, env)

    print('Greedy Policy')
    print('Total Reward:', greedy_reward)
    print('Total Step Count:', greedy_step_count)

if __name__ == "__main__":
    main()


Step 1:
Action: down
. . . 
A . . 
. . G 

Reward: -1
Step 2:
Action: down
. . . 
. . . 
A . G 

Reward: -1
Step 3:
Action: right
. . . 
. . . 
. A G 

Reward: -1
Step 4:
Action: right
. . . 
. . . 
. . A 

Reward: 1
Goal Reached
Greedy Policy
Total Reward: -2
Total Step Count: 4
