In [10]:
import numpy as np

class LinearRewardFunctionOnUnitSphere:
    def __init__(self, weights):
        """
        Initialize the linear reward function with normalization to ensure the reward vector lies on the unit sphere.

        :param weights: List or array of weights [w_x, w_y, w_obstacle, w_fragile]
        """
        self.weights = np.array(weights)
        # Normalize weights to ensure they lie on the unit sphere
        self.weights = self.weights / np.linalg.norm(self.weights)

    def compute_reward(self, state):
        """
        Compute the reward for a given state using a linear function and normalize to unit sphere.

        :param state: Tuple (x, y, obstacle, fragile)
                      - x, y: coordinates (continuous or discrete)
                      - obstacle: 1 if obstacle present, 0 otherwise
                      - fragile: 1 if fragile object is present, 0 otherwise
        :return: Computed reward value (normalized)
        """
        state_vector = np.array(state)
        reward = np.dot(self.weights, state_vector)
        return reward

# Example usage:
weights = [1.0, 1.0, 5.0, 10.0]  # Example weights (using costs, so positive values)
linear_reward_function = LinearRewardFunctionOnUnitSphere(weights)

states = [
    (3, 2, 1, 0),  # Obstacle present, no fragile
    (3, 2, 0, 1),  # Fragile present, no obstacle
    (3, 2, 1, 1),  # Both obstacle and fragile present
    (3, 2, 0, 0)   # No obstacle, no fragile
]

for state in states:
    reward = linear_reward_function.compute_reward(state)
    print(f"Linear Reward for state {state}: {reward}")


Linear Reward for state (3, 2, 1, 0): 0.8873565094161138
Linear Reward for state (3, 2, 0, 1): 1.3310347641241709
Linear Reward for state (3, 2, 1, 1): 1.7747130188322275
Linear Reward for state (3, 2, 0, 0): 0.4436782547080569
