<a href="https://colab.research.google.com/github/mahault/Self-explainability/blob/main/explainability.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

class ExplainableActiveInference:
    """
    Implements an explainable AI agent using Active Inference principles.
    This model replaces static influence scoring with a hierarchical generative model.
    """

    def __init__(self, state_size):
        # Initialize priors and uncertainties
        self.state_size = state_size
        self.prior_influence = np.zeros(state_size)  # Influence scores for each state
        self.belief_uncertainty = np.ones(state_size)  # Higher means more uncertainty

    def infer_influence(self, selected_action, qs, B):
        """
        Infers influence of past states on action selection using Bayesian updates.
        Returns:
            influence_scores: Bayesian-updated influence estimates
            uncertainty_scores: Self-tracked uncertainty of influence
        """
        ACTIONABLE_ROW = 0
        action_taken = selected_action[ACTIONABLE_ROW]
        latest_beliefs = qs[ACTIONABLE_ROW]  # Belief distribution over states

        influence_scores = np.zeros(self.state_size)
        uncertainty_scores = np.zeros(self.state_size)

        for state_idx in range(self.state_size):
            belief = latest_beliefs[state_idx]  # Probability of being in this state

            # Probabilistic transition to next state
            if hasattr(B, '__getitem__'):
                next_state_prob = B[ACTIONABLE_ROW][state_idx]
            else:
                next_state_prob = np.eye(self.state_size)[state_idx]  # Identity transition if B is missing

            # Expected free energy contribution (Bayesian approach)
            influence_score = belief * np.mean(next_state_prob)

            # Bayesian Update of Prior Belief
            self.prior_influence[state_idx] = (self.prior_influence[state_idx] + influence_score) / 2

            # Update uncertainty: tracking variance of influence scores
            uncertainty_scores[state_idx] = np.std([self.prior_influence[state_idx], influence_score])

        self.belief_uncertainty = uncertainty_scores
        return self.prior_influence, uncertainty_scores

    def introspection_score(self):
        """
        Measures how well the AI understands its own decision influences.
        Returns:
            A score between 0 and 1 (higher = more self-aware)
        """
        mean_uncertainty = np.mean(self.belief_uncertainty)
        return 1 / (1 + mean_uncertainty)  # Higher score means lower uncertainty (better explainability)

    def explain_decision(self):
        """
        Generates a human-readable explanation of why the AI made its decision.
        Returns:
            A textual explanation based on influence and introspection scores.
        """
        max_influence_state = np.argmax(self.prior_influence)
        introspection_quality = self.introspection_score()

        explanation = (
            f"The AI chose its action primarily based on state {max_influence_state}, "
            f"which had the highest influence score ({self.prior_influence[max_influence_state]:.3f}). "
        )

        if introspection_quality > 0.8:
            explanation += "The AI is confident in its self-explanation."
        elif introspection_quality > 0.5:
            explanation += "The AI has moderate confidence in its explanation."
        else:
            explanation += "The AI has high uncertainty about why it made this decision."

        return explanation

# Example Usage
if __name__ == "__main__":
    state_size = 10  # Assume 10 possible states
    ai_system = ExplainableActiveInference(state_size)

    # Example action (4 possible actions)
    selected_action = np.array([0.1, 0.2, 0.5, 0.2])

    # Example belief state distribution
    qs = np.array([[0.05, 0.1, 0.3, 0.1, 0.15, 0.1, 0.05, 0.05, 0.05, 0.05]])

    # Example transition model (B-matrix)
    B = np.random.rand(1, state_size, state_size)  # Random state transitions

    # Perform Bayesian influence inference
    influence_scores, uncertainty_scores = ai_system.infer_influence(selected_action, qs, B)

    # Print explanation
    print(ai_system.explain_decision())


The AI chose its action primarily based on state 2, which had the highest influence score (0.049). The AI is confident in its self-explanation.
