In [1]:
def reward_1(observation, lockedroom_color, keyroom_color, door_color):
    import numpy as np

    # Define RGB color ranges for different room colors
    color_ranges = {
        'red': ((200, 0, 0), (255, 50, 50)),
        'green': ((0, 200, 0), (50, 255, 50)),
        'blue': ((0, 0, 200), (50, 50, 255)),
        'purple': ((128, 0, 128), (177, 50, 177)),
        'yellow': ((200, 200, 0), (255, 255, 50)),
        'grey': ((130, 130, 130), (180, 180, 180))
    }

    # Helper function to check if a pixel belongs to the color range
    def in_color_range(pixel, color):
        lower, upper = color_ranges[color]
        return np.all(pixel >= lower) and np.all(pixel <= upper)

    # Extract the image from the observation
    image = observation['image']

    # Get the color range for the keyroom
    keyroom_color_range = color_ranges[keyroom_color]

    # Find the keyroom in the image
    keyroom_pixels = np.array([in_color_range(image[i, j], keyroom_color) for i in range(image.shape[0]) for j in range(image.shape[1])])
    keyroom_pixels = keyroom_pixels.reshape(image.shape[0], image.shape[1])

    # Calculate the center of the keyroom
    keyroom_positions = np.argwhere(keyroom_pixels)
    if keyroom_positions.size == 0:
        return 0  # No keyroom detected, minimal reward

    keyroom_center = np.mean(keyroom_positions, axis=0)

    # Calculate distance from the agent's position at the bottom center of the image to the keyroom center
    agent_position = np.array([image.shape[0] - 1, image.shape[1] // 2])
    distance = np.linalg.norm(keyroom_center - agent_position)

    # Normalize the distance to provide a reward between 0 and 1 (1 being the highest reward when distance is 0)
    max_distance = np.linalg.norm(np.array([0, 0]) - np.array([image.shape[0] - 1, image.shape[1] - 1]))
    reward = 1 - (distance / max_distance)

    return reward