# Implementing Gridworld Environment

This is ChatGPT Generated Activity

Description:
- Create a simple gridworld environment using Python where an agent moves in a 2D grid.
- Define states, actions, transition probabilities, and rewards.
- Test different policies manually to understand how value functions work.

In [16]:
import numpy as np
import random

In [17]:
import random

class GridworldEnv:
    def __init__(self, grid_size=(4, 4), goal_state=(0, 3), obstacles=[(1, 1), (2, 2)]):
        self.grid_size = grid_size
        self.goal_state = goal_state
        self.obstacles = obstacles
        self.actions = ['Up', 'Down', 'Left', 'Right']
        self.action_moves = {
            'Up': (-1, 0),
            'Down': (1, 0),
            'Left': (0, -1),
            'Right': (0, 1)
        }

    def reset(self):
        """Resets the environment to the starting state."""
        self.state = (0, 0)
        return self.state

    def step(self, action):
        """Executes a step in the environment."""
        assert action in self.actions, "Invalid action!"

        # Calculate intended move
        intended_move = self.action_moves[action]
        next_state = (self.state[0] + intended_move[0], self.state[1] + intended_move[1])

        # Boundary and obstacle checks
        if not (0 <= next_state[0] < self.grid_size[0] and 0 <= next_state[1] < self.grid_size[1]):
            next_state = self.state  # Stay in the same state if out of bounds

        if next_state in self.obstacles:
            next_state = self.state  # Stay in the same state if hitting an obstacle

        # Slippage (20% chance of unintended move)
        if random.random() > 0.8:
            possible_moves = list(self.action_moves.values())
            possible_moves.remove(intended_move)
            slip_move = random.choice(possible_moves)
            slip_state = (self.state[0] + slip_move[0], self.state[1] + slip_move[1])

            # Validate slip move
            if 0 <= slip_state[0] < self.grid_size[0] and 0 <= slip_state[1] < self.grid_size[1]:
                next_state = slip_state if slip_state not in self.obstacles else self.state

        # Rewards and episode termination
        if next_state == self.goal_state:
            reward = 10
            done = True
        elif next_state in self.obstacles:
            reward = -5
            done = True
        else:
            reward = -1
            done = False

        self.state = next_state
        return next_state, reward, done

    def render(self):
        """Prints the grid with the current agent position."""
        grid = [['.' for _ in range(self.grid_size[1])] for _ in range(self.grid_size[0])]

        # Mark goal and obstacles
        grid[self.goal_state[0]][self.goal_state[1]] = 'G'
        for obstacle in self.obstacles:
            grid[obstacle[0]][obstacle[1]] = 'X'

        # Mark agent position
        agent_row, agent_col = self.state
        grid[agent_row][agent_col] = 'A'

        # Print the grid
        for row in grid:
            print(' '.join(row))
        print()

In [20]:
env = GridworldEnv()

state = env.reset()
env.render()

A . . G
. X . .
. . X .
. . . .

