# Create a custom environment for Cliff Walker by defining the appropriate state, action, reward and termination.


**CS22B2043**
**Bandaru Ritesh Kumar**

In [3]:
import numpy as np
import gym
from gym import Env, spaces

class CustomCliffWalking(Env):
    def __init__(self, grid_size1=4, grid_size2=6):
        super(CustomCliffWalking, self).__init__()

        # Define grid size
        self.grid_size1 = grid_size1
        self.grid_size2 = grid_size2

        # Action spaces: 0 - left; 1 - down; 2 - right; 3 - up
        self.action_space = spaces.Discrete(4)

        # Observation space where each state corresponds to a grid cell
        self.observation_space = spaces.Discrete(grid_size1 * grid_size2)

        # Define cliff layout: 1 = hole, 2 = goal
        self.cliff = np.zeros((grid_size1, grid_size2), dtype=int)
        self.cliff[3, 5] = 2  # Goal
        self.cliff[3, 1:5] = 1  # Cliff

        # Rewards
        self.rewards = np.zeros_like(self.cliff, dtype=float)
        self.rewards[3, 5] = 100.0  # Goal reward
        self.rewards[self.cliff == 0] = 10.0  # Cliff Reward
        self.rewards[self.cliff == 1] = -10.0  # Cliff penalty

        # Starting state
        self.state = (3, 0)

    def step(self, action):
        x, y = self.state

        # Move agent
        if action == 0:  # Left
            y = max(0, y - 1)
        elif action == 1:  # Down
            x = min(self.grid_size1 - 1, x + 1)
        elif action == 2:  # Right
            y = min(self.grid_size2 - 1, y + 1)
        elif action == 3:  # Up
            x = max(0, x - 1)

        # Update state
        self.state = (x, y)

        reward = self.rewards[x, y]

        # Termination check
        done = self.cliff[x, y] == 2 or self.cliff[x, y] == 1

        # Return all info
        return self._get_state_index(), reward, done, {}

    def reset(self):
        self.state = (3, 0)  # Reset to the starting position
        return self._get_state_index()

    def render(self):
        grid = np.array(self.cliff, dtype=str)
        grid[self.cliff == 0] = "."
        grid[self.cliff == 1] = "H"
        grid[self.cliff == 2] = "G"
        x, y = self.state
        grid[x, y] = "A"  # Agent
        print("\n".join(" ".join(row) for row in grid))

    def _get_state_index(self):
        return self.state[0] * self.grid_size2 + self.state[1]


# Test the environment
env = CustomCliffWalking(grid_size1=4, grid_size2=6)
state = env.reset()
done = False

print("Initial Environment:")
env.render()

while not done:
    action = env.action_space.sample()  # Random policy
    next_state, reward, done, info = env.step(action)
    print(f"\nAction: {action}")
    env.render()
    print(f"Reward: {reward}")

Initial Environment:
. . . . . .
. . . . . .
. . . . . .
A H H H H G

Action: 1
. . . . . .
. . . . . .
. . . . . .
A H H H H G
Reward: 10.0

Action: 3
. . . . . .
. . . . . .
A . . . . .
. H H H H G
Reward: 10.0

Action: 0
. . . . . .
. . . . . .
A . . . . .
. H H H H G
Reward: 10.0

Action: 3
. . . . . .
A . . . . .
. . . . . .
. H H H H G
Reward: 10.0

Action: 2
. . . . . .
. A . . . .
. . . . . .
. H H H H G
Reward: 10.0

Action: 0
. . . . . .
A . . . . .
. . . . . .
. H H H H G
Reward: 10.0

Action: 0
. . . . . .
A . . . . .
. . . . . .
. H H H H G
Reward: 10.0

Action: 0
. . . . . .
A . . . . .
. . . . . .
. H H H H G
Reward: 10.0

Action: 1
. . . . . .
. . . . . .
A . . . . .
. H H H H G
Reward: 10.0

Action: 0
. . . . . .
. . . . . .
A . . . . .
. H H H H G
Reward: 10.0

Action: 3
. . . . . .
A . . . . .
. . . . . .
. H H H H G
Reward: 10.0

Action: 1
. . . . . .
. . . . . .
A . . . . .
. H H H H G
Reward: 10.0

Action: 2
. . . . . .
. . . . . .
. A . . . .
. H H H H G
Reward: 