In [1]:
import random

class GridWorld:
    def __init__(self, size=3):
        self.size = size
        self.agent_position = [0, 0]
        self.goal_position = [size - 1, size - 1]

    def reset(self):
        self.agent_position = [0, 0]
        return self.agent_position

    def step(self, action):
        x, y = self.agent_position
        if action == 'up':
            x = max(0, x - 1)
        elif action == 'down':
            x = min(self.size - 1, x + 1)
        elif action == 'left':
            y = max(0, y - 1)
        elif action == 'right':
            y = min(self.size - 1, y + 1)

        self.agent_position = [x, y]

        reward = -1
        done = False

        if self.agent_position == self.goal_position:
            reward = 1
            done = True
        return self.agent_position, reward, done

    def render(self):
        for i in range(self.size):
            for j in range(self.size):
                if [i, j] == self.agent_position:
                    print("A", end=" ")
                elif [i, j] == self.goal_position:
                    print("G", end=" ")
                else:
                    print(".", end=" ")
            print()
        print()

class RandomPolicy:
    def __init__(self, actions):
        self.actions = actions

    def select_action(self, state):
        return random.choice(self.actions)

class DeterministicPolicy:
    def __init__(self, actions):
        self.actions = actions

    def select_action(self, state):
        x, y = state
        if y < 2:
            return 'right'
        else:
            return 'down'

class GreedyPolicy:
    def __init__(self, actions):
        self.actions = actions

    def select_action(self, state, goal):
        x, y = state
        gx, gy = goal

        if x < gx:
            return 'down'
        elif x > gx:
            return 'up'
        elif y > gy:
            return 'left'
        elif y < gy:
            return 'right'

        return random.choice(self.actions)

def simulate_policy(policy, env, goal_position=None):
    state = env.reset()
    total_reward = 0
    step_count = 0

    done = False
    while not done:
        if isinstance(policy, GreedyPolicy):
            action = policy.select_action(state, env.goal_position)
        else:
            action = policy.select_action(state)
        print(f"Step {step_count + 1}:")
        print(f"Action: {action}")
        state, reward, done = env.step(action)
        env.render()
        print(f"Reward: {reward}")
        total_reward += reward
        step_count += 1
        if done:
            print('Goal Reached')
            break

    return total_reward, step_count

def main():
    env = GridWorld()
    actions = ['up', 'down', 'left', 'right']

    # Random Policy
    random_policy = RandomPolicy(actions)
    print("Random Policy")
    random_reward, random_steps = simulate_policy(random_policy, env)
    print(f"Total Reward: {random_reward}")
    print(f"Total Steps: {random_steps}\n")

    # Deterministic Policy
    deterministic_policy = DeterministicPolicy(actions)
    print("Deterministic Policy")
    deterministic_reward, deterministic_steps = simulate_policy(deterministic_policy, env)
    print(f"Total Reward: {deterministic_reward}")
    print(f"Total Steps: {deterministic_steps}\n")

    # Greedy Policy
    greedy_policy = GreedyPolicy(actions)
    print("Greedy Policy")
    greedy_reward, greedy_steps = simulate_policy(greedy_policy, env)
    print(f"Total Reward: {greedy_reward}")
    print(f"Total Steps: {greedy_steps}\n")

if __name__ == "__main__":
    main()


Random Policy
Step 1:
Action: right
. A . 
. . . 
. . G 

Reward: -1
Step 2:
Action: left
A . . 
. . . 
. . G 

Reward: -1
Step 3:
Action: left
A . . 
. . . 
. . G 

Reward: -1
Step 4:
Action: left
A . . 
. . . 
. . G 

Reward: -1
Step 5:
Action: up
A . . 
. . . 
. . G 

Reward: -1
Step 6:
Action: right
. A . 
. . . 
. . G 

Reward: -1
Step 7:
Action: right
. . A 
. . . 
. . G 

Reward: -1
Step 8:
Action: left
. A . 
. . . 
. . G 

Reward: -1
Step 9:
Action: left
A . . 
. . . 
. . G 

Reward: -1
Step 10:
Action: right
. A . 
. . . 
. . G 

Reward: -1
Step 11:
Action: up
. A . 
. . . 
. . G 

Reward: -1
Step 12:
Action: down
. . . 
. A . 
. . G 

Reward: -1
Step 13:
Action: down
. . . 
. . . 
. A G 

Reward: -1
Step 14:
Action: up
. . . 
. A . 
. . G 

Reward: -1
Step 15:
Action: left
. . . 
A . . 
. . G 

Reward: -1
Step 16:
Action: left
. . . 
A . . 
. . G 

Reward: -1
Step 17:
Action: right
. . . 
. A . 
. . G 

Reward: -1
Step 18:
Action: down
. . . 
. . . 
. A G 

Reward: -1
Step 1