<a href="https://colab.research.google.com/github/mahault/Self-explainability/blob/main/explainability.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

class ExplainableActiveInference:
    """
    Implements an explainable AI agent using Active Inference principles.
    This model replaces static influence scoring with a hierarchical generative model.
    """

    def __init__(self, state_size):
        # Initialize priors and uncertainties
        self.state_size = state_size
        self.prior_influence = np.zeros(state_size)  # Influence scores for each state
        self.belief_uncertainty = np.ones(state_size)  # Higher means more uncertainty

    def infer_influence(self, selected_action, qs, B):
        """
        Infers influence of past states on action selection using Bayesian updates.
        Returns:
            influence_scores: Bayesian-updated influence estimates
            uncertainty_scores: Self-tracked uncertainty of influence
        """
        ACTIONABLE_ROW = 0
        action_taken = selected_action[ACTIONABLE_ROW]
        latest_beliefs = qs[ACTIONABLE_ROW]  # Belief distribution over states

        influence_scores = np.zeros(self.state_size)
        uncertainty_scores = np.zeros(self.state_size)

        for state_idx in range(self.state_size):
            belief = latest_beliefs[state_idx]  # Probability of being in this state

            # Probabilistic transition to next state
            if hasattr(B, '__getitem__'):
                next_state_prob = B[ACTIONABLE_ROW][state_idx]
            else:
                next_state_prob = np.eye(self.state_size)[state_idx]  # Identity transition if B is missing

            # Expected free energy contribution (Bayesian approach)
            influence_score = belief * np.mean(next_state_prob)

            # Bayesian Update of Prior Belief
            self.prior_influence[state_idx] = (self.prior_influence[state_idx] + influence_score) / 2

            # Update uncertainty: tracking variance of influence scores
            uncertainty_scores[state_idx] = np.std([self.prior_influence[state_idx], influence_score])

        self.belief_uncertainty = uncertainty_scores
        return self.prior_influence, uncertainty_scores

    def introspection_score(self):
        """
        Measures how well the AI understands its own decision influences.
        Returns:
            A score between 0 and 1 (higher = more self-aware)
        """
        mean_uncertainty = np.mean(self.belief_uncertainty)
        return 1 / (1 + mean_uncertainty)  # Higher score means lower uncertainty (better explainability)

    def explain_decision(self):
        """
        Generates a human-readable explanation of why the AI made its decision.
        Returns:
            A textual explanation based on influence and introspection scores.
        """
        max_influence_state = np.argmax(self.prior_influence)
        introspection_quality = self.introspection_score()

        explanation = (
            f"The AI chose its action primarily based on state {max_influence_state}, "
            f"which had the highest influence score ({self.prior_influence[max_influence_state]:.3f}). "
        )

        if introspection_quality > 0.8:
            explanation += "The AI is confident in its self-explanation."
        elif introspection_quality > 0.5:
            explanation += "The AI has moderate confidence in its explanation."
        else:
            explanation += "The AI has high uncertainty about why it made this decision."

        return explanation

# Example Usage
if __name__ == "__main__":
    state_size = 10  # Assume 10 possible states
    ai_system = ExplainableActiveInference(state_size)

    # Example action (4 possible actions)
    selected_action = np.array([0.1, 0.2, 0.5, 0.2])

    # Example belief state distribution
    qs = np.array([[0.05, 0.1, 0.3, 0.1, 0.15, 0.1, 0.05, 0.05, 0.05, 0.05]])

    # Example transition model (B-matrix)
    B = np.random.rand(1, state_size, state_size)  # Random state transitions

    # Perform Bayesian influence inference
    influence_scores, uncertainty_scores = ai_system.infer_influence(selected_action, qs, B)

    # Print explanation
    print(ai_system.explain_decision())


The AI chose its action primarily based on state 2, which had the highest influence score (0.049). The AI is confident in its self-explanation.


In [8]:
import numpy as np

class HierarchicalGenerativeModelAI:
    """
    Implements a hierarchical generative model for explainable AI.
    This model has three levels:
    1. Overt action layer: standard Active Inference.
    2. Covert policy selection layer: tracks influence of beliefs on actions.
    3. Meta-introspection layer: evaluates self-awareness of decision-making.
    """

    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size

        # **Level 1: Beliefs about states**
        self.qs = np.ones(state_size) / state_size  # Belief over states
        self.B = np.random.rand(state_size, state_size)  # Transition matrix
        self.A = np.random.rand(state_size, state_size)  # Likelihood matrix P(o|s)

        # **Level 2: Policy selection**
        self.qpi = np.ones(action_size) / action_size  # Belief over policies
        self.G = np.random.rand(action_size)  # Expected Free Energy (EFE)

        # **Level 3: Introspection layer**
        self.U = np.ones(state_size) / state_size  # Belief about self-awareness
        self.gamma = np.random.rand(state_size)  # Attentional weight for introspection

    def infer_states(self, observation):
        """
        Level 1: Bayesian state inference P(s_t | o_t).
        """
        likelihood = self.A[:, observation]  # P(o_t | s_t)
        self.qs = likelihood * self.qs  # Bayesian update
        self.qs /= np.sum(self.qs)  # Normalize

    def infer_policies(self):
        """
        Level 2: Infer best policy using Expected Free Energy (EFE).
        """
        expected_outcomes = self.B @ self.qs  # Predicted future states

        # Ensure probabilities sum to 1 (avoid division by zero)
        expected_outcomes = expected_outcomes / (np.sum(expected_outcomes) + 1e-6)

        # Compute Expected Free Energy (EFE) per action (assume B maps to actions)
        self.G = -np.sum(expected_outcomes * np.log(expected_outcomes + 1e-6))  # Still a scalar

        # Ensure `G` is a vector over `action_size`
        self.G = np.repeat(self.G, self.action_size)  # Expand into a vector

        # Convert EFE to policy probabilities
        self.qpi = np.exp(-self.G)  # Convert EFE scores to probabilities
        self.qpi /= np.sum(self.qpi)  # Normalize

        # Validate that qpi is properly normalized
        if not np.isclose(np.sum(self.qpi), 1.0, atol=1e-6):
            raise ValueError(f"qpi normalization failed, sum={np.sum(self.qpi)}")

        # Ensure `qpi` has the correct shape
        if self.qpi.shape[0] != self.action_size:
            raise ValueError(f"qpi must be a probability vector of size {self.action_size}, got shape {self.qpi.shape}")


    def infer_introspection(self):
        """
        Level 3: Meta-awareness of decision-making confidence.
        """
        self.U = 1 / (1 + np.std(self.qs))  # Self-awareness is inverse of uncertainty
        self.gamma = np.exp(-self.U)  # Attentional modulation based on introspection

    def select_action(self):
        """
        Sample an action from the inferred policy.
        """
        if len(self.qpi.shape) == 0 or len(self.qpi) != self.action_size:
            raise ValueError(f"qpi must be a probability vector of size {self.action_size}, got {self.qpi.shape}")

        action = np.random.choice(self.action_size, p=self.qpi)
        return action


    def explain_decision(self):
        """
        Generates a hierarchical explanation of the AI's decision.
        """
        chosen_state = np.argmax(self.qs)
        introspection_quality = np.mean(self.U)

        explanation = (
            f"Level 1: The AI believes it is in state {chosen_state} "
            f"with probability {self.qs[chosen_state]:.3f}.\n"
            f"Level 2: The selected policy has an expected free energy of {self.G.min():.3f}.\n"
            f"Level 3: The AI's introspection confidence is {introspection_quality:.3f}."
        )

        if introspection_quality > 0.8:
            explanation += " The AI is highly confident in its explanation."
        elif introspection_quality > 0.5:
            explanation += " The AI has moderate confidence in its explanation."
        else:
            explanation += " The AI has high uncertainty about its reasoning."

        return explanation

# Example Usage
if __name__ == "__main__":
    state_size = 5  # Assume 5 possible states
    action_size = 3  # Assume 3 possible actions

    ai_system = HierarchicalGenerativeModelAI(state_size, action_size)

    # Example observation (randomly chosen)
    observation = np.random.randint(0, state_size)

    # Perform Bayesian state inference
    ai_system.infer_states(observation)

    # Infer the best policy
    ai_system.infer_policies()

    # Perform meta-introspection
    ai_system.infer_introspection()

    # Select an action
    action = ai_system.select_action()

    # Explain the decision
    print(ai_system.explain_decision())


Level 1: The AI believes it is in state 2 with probability 0.246.
Level 2: The selected policy has an expected free energy of 1.542.
Level 3: The AI's introspection confidence is 0.943. The AI is highly confident in its explanation.


In [9]:
import numpy as np

class ExplainableActiveInference:
    """
    Implements an explainable AI agent using Active Inference principles.
    Now explicitly tracks belief updates and action selection reasoning.
    """

    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size

        # **Level 1: Beliefs about states**
        self.qs = np.ones(state_size) / state_size  # Belief over states
        self.B = np.random.rand(state_size, state_size)  # Transition matrix
        self.A = np.random.rand(state_size, state_size)  # Likelihood matrix P(o|s)

        # **Level 2: Policy selection**
        self.qpi = np.ones(action_size) / action_size  # Belief over policies
        self.G = np.zeros(action_size)  # Expected Free Energy (EFE)

        # **Level 3: Introspection layer**
        self.U = np.ones(state_size) / state_size  # Belief about self-awareness
        self.gamma = np.random.rand(state_size)  # Attentional weight for introspection

        # Storage for explainability
        self.belief_trace = []
        self.action_trace = []

    def infer_states(self, observation):
        """
        Level 1: Bayesian state inference P(s_t | o_t).
        Now tracks which observations contributed most to belief updates.
        """
        likelihood = self.A[:, observation]  # P(o_t | s_t)
        prior_belief = self.qs.copy()

        # Bayesian update
        self.qs = likelihood * self.qs
        self.qs /= np.sum(self.qs)  # Normalize

        # Identify which state changed the most
        belief_changes = np.abs(self.qs - prior_belief)
        most_updated_state = np.argmax(belief_changes)

        # Store trace of belief update
        self.belief_trace.append({
            "prior_belief": prior_belief,
            "updated_belief": self.qs,
            "most_updated_state": most_updated_state,
            "likelihood_used": likelihood
        })

    def infer_policies(self):
        """
        Level 2: Infer best policy using Expected Free Energy (EFE).
        Now explicitly tracks rejected alternative actions.
        """
        expected_outcomes = self.B @ self.qs  # Predicted future states

        # Compute Expected Free Energy (EFE) per action
        for action in range(self.action_size):
            # Simulate taking an action by considering expected transition
            future_state = expected_outcomes[action]
            self.G[action] = -np.sum(future_state * np.log(future_state + 1e-6))  # EFE

        # Convert EFE to policy probabilities
        self.qpi = np.exp(-self.G)
        self.qpi /= np.sum(self.qpi)

        # Store action reasoning trace
        sorted_actions = np.argsort(self.G)  # Rank actions by EFE (lower is better)
        self.action_trace.append({
            "EFE_values": self.G,
            "best_action": sorted_actions[0],
            "rejected_actions": sorted_actions[1:].tolist()
        })

    def select_action(self):
        """
        Selects an action based on the inferred policy.
        """
        action = np.random.choice(self.action_size, p=self.qpi)
        return action

    def explain_decision(self):
        """
        Generates a structured explanation including belief updates and action selection.
        """
        chosen_state = np.argmax(self.qs)
        introspection_quality = np.mean(self.U)

        # Get belief update reasoning
        belief_info = self.belief_trace[-1]
        action_info = self.action_trace[-1]

        explanation = (
            f"Level 1: The AI believes it is in state {chosen_state} "
            f"with probability {self.qs[chosen_state]:.3f}, based on observation likelihoods {belief_info['likelihood_used']}.\n"
            f"Most updated state: {belief_info['most_updated_state']}.\n"
            f"Level 2: The selected policy has an expected free energy of {self.G.min():.3f}.\n"
            f"Action Selection: The AI chose action {action_info['best_action']} "
            f"because it had the lowest EFE ({self.G[action_info['best_action']]:.3f}).\n"
            f"Rejected actions: {action_info['rejected_actions']} "
            f"with higher EFEs: {[self.G[a] for a in action_info['rejected_actions']]}.\n"
            f"Level 3: The AI's introspection confidence is {introspection_quality:.3f}."
        )

        return explanation


# Example Usage
if __name__ == "__main__":
    state_size = 5
    action_size = 3

    ai_system = ExplainableActiveInference(state_size, action_size)

    # Example observation
    observation = np.random.randint(0, state_size)

    # Perform Bayesian state inference
    ai_system.infer_states(observation)

    # Infer the best policy
    ai_system.infer_policies()

    # Select an action
    action = ai_system.select_action()

    # Explain the decision
    print(ai_system.explain_decision())



Level 1: The AI believes it is in state 2 with probability 0.656, based on observation likelihoods [0.10598477 0.28071784 0.90216317 0.0496934  0.03588424].
Most updated state: 2.
Level 2: The selected policy has an expected free energy of 0.306.
Action Selection: The AI chose action 1 because it had the lowest EFE (0.306).
Rejected actions: [0, 2] with higher EFEs: [0.358424395146506, 0.367870128218928].
Level 3: The AI's introspection confidence is 0.200.


In [14]:
import numpy as np

class ExplainableGenerativeAI:
    """
    Implements a full generative model for explainability.
    Now explicitly tracks:
    1. Likelihood updates (P(o|s))
    2. Variational Free Energy (VFE)
    3. Expected Free Energy (EFE) components (epistemic + pragmatic)
    4. How uncertainty propagates to introspection confidence
    """

    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size

        # **Level 1: State beliefs**
        self.qs = np.ones(state_size) / state_size  # Belief over states
        self.B = np.random.rand(state_size, state_size)  # Transition matrix
        self.A = np.random.rand(state_size, state_size)  # Likelihood matrix P(o|s)

        # **Level 2: Policy selection**
        self.qpi = np.ones(action_size) / action_size  # Policy selection distribution
        self.G = np.zeros(action_size)  # Expected Free Energy (EFE)

        # **Level 3: Introspection layer**
        self.U = np.ones(state_size) / state_size  # Belief about self-awareness
        self.gamma = np.random.rand(state_size)  # Attentional weight for introspection

        # Tracking for explainability
        self.belief_trace = []
        self.action_trace = []
        self.vfe_trace = []

    def infer_states(self, observation):
        """
        Level 1: Bayesian inference over states.
        Now explicitly tracks likelihood updates and Variational Free Energy (VFE).
        """
        prior_belief = self.qs.copy()
        likelihood = self.A[:, observation]  # P(o_t | s_t)

        # Compute Variational Free Energy (VFE)
        vfe = -np.sum(self.qs * np.log(likelihood + 1e-6))

        # Bayesian update
        self.qs = likelihood * self.qs
        self.qs /= np.sum(self.qs)  # Normalize

        # Identify which state changed the most
        belief_changes = np.abs(self.qs - prior_belief)
        most_updated_state = np.argmax(belief_changes)

        # Store traces for explanation
        self.belief_trace.append({
            "prior_belief": prior_belief,
            "updated_belief": self.qs,
            "most_updated_state": most_updated_state,
            "likelihood_used": likelihood,
            "VFE": vfe
        })
        self.vfe_trace.append(vfe)

    def infer_policies(self):
        """
        Level 2: Infer best policy using Expected Free Energy (EFE).
        Now explicitly tracks epistemic and pragmatic contributions.
        """
        # Generate expected outcomes over actions, not states
        expected_outcomes = np.random.rand(self.action_size, self.state_size)  # Placeholder for action-state transitions

        # Ensure expected_outcomes is correctly shaped
        if expected_outcomes.ndim == 1:
            expected_outcomes = expected_outcomes.reshape(-1, 1)

        # Compute separate components of EFE for each action
        epistemic_value = -np.sum(expected_outcomes * np.log(expected_outcomes + 1e-6), axis=1)  # Over actions
        pragmatic_value = -np.sum(expected_outcomes * np.log(self.qs + 1e-6), axis=1)  # Over actions

        # Ensure both components are vectors of shape (action_size,)
        self.G = epistemic_value + pragmatic_value  # Full EFE

        # Convert EFE to policy probabilities
        self.qpi = np.exp(-self.G)
        self.qpi /= np.sum(self.qpi)

        # Ensure `qpi` is a vector of correct size
        if self.qpi.shape != (self.action_size,):
            raise ValueError(f"qpi must be a probability vector of size {self.action_size}, but got shape {self.qpi.shape}")

        # Store action reasoning trace
        sorted_actions = np.argsort(self.G)  # Rank actions by EFE (lower is better)
        self.action_trace.append({
            "EFE_values": self.G,
            "best_action": sorted_actions[0],
            "rejected_actions": sorted_actions[1:].tolist(),
            "epistemic_value": epistemic_value,
            "pragmatic_value": pragmatic_value
        })
    def select_action(self):
        """
        Selects an action based on the inferred policy.
        """
        action = np.random.choice(self.action_size, p=self.qpi)
        return action

    def explain_decision(self):
        """
        Generates a structured explanation including:
        - Variational Free Energy (VFE) for belief updates
        - Epistemic vs. pragmatic components of Expected Free Energy (EFE)
        - A causal breakdown of how decisions were made
        """
        chosen_state = np.argmax(self.qs)
        introspection_quality = np.mean(self.U)

        # Get belief update reasoning
        belief_info = self.belief_trace[-1]
        action_info = self.action_trace[-1]
        vfe = belief_info["VFE"]

        # Find key contributing factors to belief update
        prior_belief = belief_info["prior_belief"]
        updated_belief = belief_info["updated_belief"]
        belief_shift = updated_belief - prior_belief
        belief_shift_state = np.argmax(np.abs(belief_shift))

        # Find key contributing factors to EFE
        best_action = action_info["best_action"]
        rejected_actions = action_info["rejected_actions"]
        epistemic_value = action_info["epistemic_value"]
        pragmatic_value = action_info["pragmatic_value"]

        # Explanation for belief update
        belief_explanation = (
            f"Belief Update:\n"
            f"- The AI originally believed it was most likely in state {np.argmax(prior_belief)}, but observation "
            f"caused belief to shift towards state {chosen_state}.\n"
            f"- The state that changed the most was {belief_shift_state}, with a shift of {belief_shift[belief_shift_state]:.3f}.\n"
            f"- This change was driven by observation likelihoods {belief_info['likelihood_used']}.\n"
            f"- Variational Free Energy (VFE) was {vfe:.3f}, meaning uncertainty decreased by this amount."
        )

        # Explanation for action selection
        efe_explanation = (
            f"Action Selection:\n"
            f"- The AI chose action {best_action} because it had the lowest Expected Free Energy (EFE): {self.G[best_action]:.3f}.\n"
            f"- Epistemic Value (expected uncertainty reduction): {epistemic_value[best_action]:.3f}.\n"
            f"- Pragmatic Value (goal alignment): {pragmatic_value[best_action]:.3f}.\n"
            f"- Rejected actions: {rejected_actions} with higher EFEs: {[self.G[a] for a in rejected_actions]}.\n"
            f"- The AI rejected action {rejected_actions[0]} because it had a lower epistemic value ({epistemic_value[rejected_actions[0]]:.3f}) "
            f"and a less favorable goal alignment ({pragmatic_value[rejected_actions[0]]:.3f})."
        )

        # Explanation for confidence
        introspection_explanation = (
            f"Introspection:\n"
            f"- The AI's introspection confidence is {introspection_quality:.3f}.\n"
            f"- Confidence is lower because the belief update had high variance, suggesting multiple competing explanations for the observations."
        )

        return f"{belief_explanation}\n\n{efe_explanation}\n\n{introspection_explanation}"

# Running the model
state_size = 5
action_size = 3

ai_system = ExplainableGenerativeAI(state_size, action_size)

# Example observation
observation = np.random.randint(0, state_size)

# Perform Bayesian state inference
ai_system.infer_states(observation)

# Infer the best policy
ai_system.infer_policies()

# Select an action
action = ai_system.select_action()

# Explain the decision
explanation = ai_system.explain_decision()
explanation

"Belief Update:\n- The AI originally believed it was most likely in state 0, but observation caused belief to shift towards state 0.\n- The state that changed the most was 1, with a shift of -0.197.\n- This change was driven by observation likelihoods [0.94458641 0.00747353 0.86784213 0.69144075 0.17349219].\n- Variational Free Energy (VFE) was 1.443, meaning uncertainty decreased by this amount.\n\nAction Selection:\n- The AI chose action 2 because it had the lowest Expected Free Energy (EFE): 6.309.\n- Epistemic Value (expected uncertainty reduction): 0.919.\n- Pragmatic Value (goal alignment): 5.390.\n- Rejected actions: [0, 1] with higher EFEs: [7.173054832999318, 9.343246138265695].\n- The AI rejected action 0 because it had a lower epistemic value (1.705) and a less favorable goal alignment (5.468).\n\nIntrospection:\n- The AI's introspection confidence is 0.200.\n- Confidence is lower because the belief update had high variance, suggesting multiple competing explanations for the

In [15]:
import numpy as np

class ExplainableGenerativeAI:
    """
    Implements a full generative model for explainability.
    Now explicitly tracks:
    1. Likelihood updates (P(o|s))
    2. Variational Free Energy (VFE)
    3. Expected Free Energy (EFE) components (epistemic + pragmatic)
    4. How uncertainty propagates to introspection confidence
    """

    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size

        # **Level 1: State beliefs**
        self.qs = np.ones(state_size) / state_size  # Belief over states
        self.B = np.random.rand(state_size, state_size)  # Transition matrix
        self.A = np.random.rand(state_size, state_size)  # Likelihood matrix P(o|s)

        # **Level 2: Policy selection**
        self.qpi = np.ones(action_size) / action_size  # Policy selection distribution
        self.G = np.zeros(action_size)  # Expected Free Energy (EFE)

        # **Level 3: Introspection layer**
        self.U = np.ones(state_size) / state_size  # Belief about self-awareness
        self.gamma = np.random.rand(state_size)  # Attentional weight for introspection

        # Tracking for explainability
        self.belief_trace = []
        self.action_trace = []
        self.vfe_trace = []

    def infer_states(self, observation):
        """
        Level 1: Bayesian inference over states.
        Now explicitly tracks likelihood updates and Variational Free Energy (VFE).
        """
        prior_belief = self.qs.copy()
        likelihood = self.A[:, observation]  # P(o_t | s_t)

        # Compute Variational Free Energy (VFE)
        vfe = -np.sum(self.qs * np.log(likelihood + 1e-6))

        # Bayesian update
        self.qs = likelihood * self.qs
        self.qs /= np.sum(self.qs)  # Normalize

        # Identify which state changed the most
        belief_changes = np.abs(self.qs - prior_belief)
        most_updated_state = np.argmax(belief_changes)

        # Store traces for explanation
        self.belief_trace.append({
            "prior_belief": prior_belief,
            "updated_belief": self.qs,
            "most_updated_state": most_updated_state,
            "likelihood_used": likelihood,
            "VFE": vfe
        })
        self.vfe_trace.append(vfe)

    def infer_policies(self):
        """
        Level 2: Infer best policy using Expected Free Energy (EFE).
        Now explicitly tracks epistemic and pragmatic contributions.
        """
        # Generate expected outcomes over actions, not states
        expected_outcomes = np.random.rand(self.action_size, self.state_size)  # Placeholder for action-state transitions

        # Ensure expected_outcomes is correctly shaped
        if expected_outcomes.ndim == 1:
            expected_outcomes = expected_outcomes.reshape(-1, 1)

        # Compute separate components of EFE for each action
        epistemic_value = -np.sum(expected_outcomes * np.log(expected_outcomes + 1e-6), axis=1)  # Over actions
        pragmatic_value = -np.sum(expected_outcomes * np.log(self.qs + 1e-6), axis=1)  # Over actions

        # Ensure both components are vectors of shape (action_size,)
        self.G = epistemic_value + pragmatic_value  # Full EFE

        # Convert EFE to policy probabilities
        self.qpi = np.exp(-self.G)
        self.qpi /= np.sum(self.qpi)

        # Ensure `qpi` is a vector of correct size
        if self.qpi.shape != (self.action_size,):
            raise ValueError(f"qpi must be a probability vector of size {self.action_size}, but got shape {self.qpi.shape}")

        # Store action reasoning trace
        sorted_actions = np.argsort(self.G)  # Rank actions by EFE (lower is better)
        self.action_trace.append({
            "EFE_values": self.G,
            "best_action": sorted_actions[0],
            "rejected_actions": sorted_actions[1:].tolist(),
            "epistemic_value": epistemic_value,
            "pragmatic_value": pragmatic_value
        })
    def select_action(self):
        """
        Selects an action based on the inferred policy.
        """
        action = np.random.choice(self.action_size, p=self.qpi)
        return action

    def explain_decision(self):
        """
        Generates a structured explanation including:
        - Variational Free Energy (VFE) for belief updates
        - Epistemic vs. pragmatic components of Expected Free Energy (EFE)
        - A causal breakdown of how decisions were made
        """
        chosen_state = np.argmax(self.qs)  # Most likely current state
        prior_state = np.argmax(self.belief_trace[-1]["prior_belief"])  # Most likely state before update
        introspection_quality = np.mean(self.U)

        # Get belief update reasoning
        belief_info = self.belief_trace[-1]
        action_info = self.action_trace[-1]
        vfe = belief_info["VFE"]

        # Find key contributing factors to belief update
        prior_belief = belief_info["prior_belief"]
        updated_belief = belief_info["updated_belief"]
        belief_shift = updated_belief - prior_belief
        belief_shift_state = np.argmax(np.abs(belief_shift))

        # Identify the likelihood that caused the biggest shift
        dominant_observation = np.argmax(belief_info["likelihood_used"])

        # Fix the belief update explanation
        if chosen_state == prior_state:
            belief_update_text = (
                f"The AI originally believed it was in state {prior_state}, and its belief remained the same after processing the observation. "
                f"However, the belief in state {belief_shift_state} changed significantly by {belief_shift[belief_shift_state]:.3f}, "
                f"due to the likelihood of observation {dominant_observation} being high ({belief_info['likelihood_used'][dominant_observation]:.3f})."
            )
        else:
            belief_update_text = (
                f"The AI originally believed it was in state {prior_state}, but observation led it to shift its belief towards state {chosen_state}. "
                f"The biggest belief shift occurred in state {belief_shift_state}, changing by {belief_shift[belief_shift_state]:.3f}, "
                f"driven by observation {dominant_observation} with likelihood {belief_info['likelihood_used'][dominant_observation]:.3f}."
            )

        # Explanation for action selection
        best_action = action_info["best_action"]
        rejected_actions = action_info["rejected_actions"]
        epistemic_value = action_info["epistemic_value"]
        pragmatic_value = action_info["pragmatic_value"]

        efe_explanation = (
            f"Action Selection:\n"
            f"- The AI chose action {best_action} because it had the lowest Expected Free Energy (EFE): {self.G[best_action]:.3f}.\n"
            f"- This means action {best_action} is expected to provide the best trade-off between gaining useful information and achieving goals.\n"
            f"- Epistemic Value (expected uncertainty reduction): {epistemic_value[best_action]:.3f}.\n"
            f"- Pragmatic Value (goal alignment): {pragmatic_value[best_action]:.3f}.\n"
            f"- The AI rejected action {rejected_actions[0]} because it had a lower epistemic value ({epistemic_value[rejected_actions[0]]:.3f}) "
            f"and a less favorable goal alignment ({pragmatic_value[rejected_actions[0]]:.3f})."
        )

        introspection_explanation = (
            f"Introspection:\n"
            f"- The AI's introspection confidence is {introspection_quality:.3f}.\n"
            f"- Confidence is lower because the belief update had competing evidence, meaning multiple interpretations were possible."
        )

        return f"{belief_update_text}\n\n{efe_explanation}\n\n{introspection_explanation}"

# Running the model
state_size = 5
action_size = 3

ai_system = ExplainableGenerativeAI(state_size, action_size)

# Example observation
observation = np.random.randint(0, state_size)

# Perform Bayesian state inference
ai_system.infer_states(observation)

# Infer the best policy
ai_system.infer_policies()

# Select an action
action = ai_system.select_action()

# Explain the decision
explanation = ai_system.explain_decision()
explanation

"The AI originally believed it was in state 0, but observation led it to shift its belief towards state 4. The biggest belief shift occurred in state 4, changing by 0.193, driven by observation 4 with likelihood 0.892.\n\nAction Selection:\n- The AI chose action 1 because it had the lowest Expected Free Energy (EFE): 5.111.\n- This means action 1 is expected to provide the best trade-off between gaining useful information and achieving goals.\n- Epistemic Value (expected uncertainty reduction): 1.239.\n- Pragmatic Value (goal alignment): 3.872.\n- The AI rejected action 2 because it had a lower epistemic value (1.327) and a less favorable goal alignment (5.660).\n\nIntrospection:\n- The AI's introspection confidence is 0.200.\n- Confidence is lower because the belief update had competing evidence, meaning multiple interpretations were possible."