**Bellman Update Function**

In [1]:
def bellman_update_basic(V, state, transitions, rewards, discount_factor):
    """
    Compute the Bellman update for a given state in an MDP.

    Parameters:
    - V: A dictionary representing the value function. Keys are states, and values are the value estimates.
    - state: The state for which the Bellman update is computed.
    - transitions: A dictionary representing the transition probabilities. Keys are (state, action) pairs,
      and values are lists of (next_state, probability) pairs.
    - rewards: A dictionary representing the rewards. Keys are (state, action) pairs, and values are the rewards.
    - discount_factor: The discount factor (gamma), a float in [0, 1).

    Returns:
    - The updated value for the given state.
    """
    max_value = float('-inf')
    for action in transitions[state]:
        expected_value = 0
        for next_state, prob in transitions[state][action]:
            expected_value += prob * (rewards[(state, action)] + discount_factor * V[next_state])
        max_value = max(max_value, expected_value)

    return max_value


**Bellman Update with Detailed Decomposition**

In [None]:
def bellman_update_decomposed(V, state, transitions, rewards, discount_factor):
    """
    Compute the Bellman update for a given state in an MDP with more decomposition for clarity.

    Parameters:
    - V: A dictionary representing the value function.
    - state: The state for which the Bellman update is computed.
    - transitions: A dictionary of transition probabilities.
    - rewards: A dictionary of rewards.
    - discount_factor: The discount factor (gamma).

    Returns:
    - Updated value for the given state.
    """
    action_values = []

    for action in transitions[state]:
        expected_value = compute_expected_value(V, state, action, transitions, rewards, discount_factor)
        action_values.append(expected_value)

    return max(action_values)


def compute_expected_value(V, state, action, transitions, rewards, discount_factor):
    """
    Compute the expected value for a given state-action pair.

    Parameters:
    - V: The value function.
    - state: The current state.
    - action: The action taken.
    - transitions: Transition probabilities.
    - rewards: Rewards associated with state-action pairs.
    - discount_factor: The discount factor (gamma).

    Returns:
    - The expected value for the state-action pair.
    """
    expected_value = 0
    for next_state, prob in transitions[state][action]:
        expected_value += prob * (rewards[(state, action)] + discount_factor * V[next_state])

    return expected_value


**Bellman Update with Numpy**

In [None]:
import numpy as np

def bellman_update_numpy(V, state, transitions, rewards, discount_factor):
    """
    Compute the Bellman update for a given state in an MDP using numpy for better performance.

    Parameters:
    - V: A numpy array representing the value function.
    - state: The index of the state for which the Bellman update is computed.
    - transitions: A list of lists representing the transition probabilities.
    - rewards: A numpy array of rewards.
    - discount_factor: The discount factor (gamma).

    Returns:
    - The updated value for the given state.
    """
    action_values = np.zeros(len(transitions[state]))

    for action in range(len(transitions[state])):
        next_states, probs = zip(*transitions[state][action])
        rewards_for_action = rewards[state, action]
        future_rewards = discount_factor * V[next_states]
        action_values[action] = np.dot(probs, rewards_for_action + future_rewards)

    return np.max(action_values)
