# RL Lab Assignment - 1

### CS22B1093 Rohan G

---------------------------------

### Create a custom environment for Cliff Walker by defining the appropriate state, action, reward and termination.

In [5]:
# importing necessary libraries
import numpy as np
import gymnasium as gym
from gymnasium import Env, spaces

In [6]:
class CustomCliffWalkerEnv(Env):
    def __init__(self, row_size = 4, column_size = 12):
        super(CustomCliffWalkerEnv, self).__init__() # Calling the constructor of the parent class

        self.row_size = row_size # Number of rows in the grid
        self.column_size = column_size # Number of columns in the grid

        self.action_space = spaces.Discrete(4) # 4 possible actions: Left, Down, Right, Up

        self.observation_space = spaces.Discrete(row_size * column_size) # Number of states in the grid

        self.cliff = np.zeros((row_size, column_size), dtype=int) # Creating a grid of zeros
        self.cliff[3,1:11] = -1 # Adding the cliff
        self.cliff[3,11] = 1 # Adding the goal

        self.rewards = np.zeros_like(self.cliff, dtype=float) # Creating a grid of zeros
        self.rewards[self.cliff == -1] = -100 # Assigning reward of -100 to the cliff
        self.rewards[self.cliff == 0] = -1 # Assigning reward of -1 to the rest of the grid
        self.rewards[3,11] = 1 # Assigning reward of 1 to the goal

        self.start = (3,0) # Starting position of the agent

    def step(self, action):
        x, y = self.state

        if (action == 0): # Left
            y = max(0,y-1)
        elif (action == 1): # Down
            x = min(self.row_size - 1, x + 1)
        elif (action == 2): # Right
            y = min(self.column_size -1 , y + 1)
        else: # Up
            x = max(0, x - 1)

        self.state = (x,y)

        reward = self.rewards[x,y]

        # if(reward == -100):
        #     self.reset()

        done = self.cliff[x,y] == 1

        return self._get_state_index(), reward, done, {}
    
    def reset(self):
        self.state = (3,0) # Resetting the state to the starting position
        return self._get_state_index()
    
    def render(self):
        grid = np.array(self.cliff, dtype = str)
        grid[self.cliff == 0] = "." # Empty cell
        grid[self.cliff == 1] = "G" # Goal
        grid[self.cliff == -1] = "-" # Cliff
        x, y = self.state
        grid[x,y] = "A" # Agent
        print("\n".join(" ".join(row) for row in grid))

    def _get_state_index(self):
        return self.state[0] * self.column_size + self.state[1] # Converting the 2D state to 1D state

env = CustomCliffWalkerEnv(row_size=4, column_size=12) # Creating an instance of the environment
state = env.reset() # Resetting the environment
done = False # Flag to check if the goal is reached

print("..........Initializing the Environment................")
env.render()

iter = 0

while not done:
    action = env.action_space.sample()
    next_state, reward, done, info = env.step(action)
    print(f"\n\tAction: {action}")
    env.render()
    print(f"\tReward: {reward}")
    if(reward == -100):
        env.reset()
    print("-----------------------")
    iter += 1

print(f"Total number of iterations done by agent to reach goal is : {iter}")

..........Initializing the Environment................
. . . . . . . . . . . .
. . . . . . . . . . . .
. . . . . . . . . . . .
A - - - - - - - - - - G

	Action: 3
. . . . . . . . . . . .
. . . . . . . . . . . .
A . . . . . . . . . . .
. - - - - - - - - - - G
	Reward: -1.0
-----------------------

	Action: 3
. . . . . . . . . . . .
A . . . . . . . . . . .
. . . . . . . . . . . .
. - - - - - - - - - - G
	Reward: -1.0
-----------------------

	Action: 2
. . . . . . . . . . . .
. A . . . . . . . . . .
. . . . . . . . . . . .
. - - - - - - - - - - G
	Reward: -1.0
-----------------------

	Action: 1
. . . . . . . . . . . .
. . . . . . . . . . . .
. A . . . . . . . . . .
. - - - - - - - - - - G
	Reward: -1.0
-----------------------

	Action: 3
. . . . . . . . . . . .
. A . . . . . . . . . .
. . . . . . . . . . . .
. - - - - - - - - - - G
	Reward: -1.0
-----------------------

	Action: 2
. . . . . . . . . . . .
. . A . . . . . . . . .
. . . . . . . . . . . .
. - - - - - - - - - - G
	Reward: -1

________