<a href="https://colab.research.google.com/github/mahi5062/192225062__csa07/blob/main/cheese_pile_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

# Define the grid-world dimensions and parameters
rows, cols = 3, 4
discount_factor = 0.9
threshold = 0.0001

# Define rewards for each cell in the grid
# A high reward in one cell signifies the "pile of cheese"
rewards = np.zeros((rows, cols))
rewards[2, 3] = 10  # Cheese pile with a high reward
rewards[1, 1] = -1  # Obstacle with a penalty

# Define actions: Up, Down, Left, Right
actions = {
    'up': (-1, 0),
    'down': (1, 0),
    'left': (0, -1),
    'right': (0, 1)
}

# Initialize state values
values = np.zeros((rows, cols))

# Check if a move is within bounds of the grid
def is_valid_position(x, y):
    return 0 <= x < rows and 0 <= y < cols

# Bellman update rule: Iteratively improve state values using the Bellman equation
def value_iteration():
    global values
    iteration = 0
    while True:
        delta = 0
        new_values = np.copy(values)

        for x in range(rows):
            for y in range(cols):
                # Skip terminal or obstacle cells
                if (x, y) == (2, 3) or (x, y) == (1, 1):
                    continue

                # Compute the value for each action from the current state (x, y)
                action_values = []
                for action, (dx, dy) in actions.items():
                    nx, ny = x + dx, y + dy
                    if is_valid_position(nx, ny):
                        action_value = rewards[x, y] + discount_factor * values[nx, ny]
                    else:
                        # Staying in the same place if moving out of bounds
                        action_value = rewards[x, y] + discount_factor * values[x, y]
                    action_values.append(action_value)

                # Update the state value with the maximum value of actions
                new_values[x, y] = max(action_values)

                # Calculate the maximum change (delta) for convergence check
                delta = max(delta, abs(new_values[x, y] - values[x, y]))

        values = new_values
        iteration += 1

        # Stop if values have converged
        if delta < threshold:
            break

    print(f"Value Iteration converged after {iteration} iterations.")
    return values

# Run the value iteration
optimal_values = value_iteration()

# Display the optimal state values
print("Optimal State Values:")
print(optimal_values)


Value Iteration converged after 1 iterations.
Optimal State Values:
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
