In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import cohen_kappa_score, matthews_corrcoef, confusion_matrix

def calculate_alignment_metrics(human_decisions, ai_decisions):
    """
    Calculate various metrics to measure preference alignment between humans and AI.
    
    Parameters:
    -----------
    human_decisions : array-like
        Binary array (0 or 1) representing human decisions to save shapes
    ai_decisions : array-like
        Binary array (0 or 1) representing AI decisions to save shapes
        
    Returns:
    --------
    dict
        Dictionary containing various alignment metrics
    """
    # Simple agreement percentage
    agreement = np.mean(human_decisions == ai_decisions) * 100
    
    # Calculate Cohen's Kappa (accounts for agreement by chance)
    kappa = cohen_kappa_score(human_decisions, ai_decisions)
    
    # Calculate Matthews Correlation Coefficient (MCC)
    mcc = matthews_corrcoef(human_decisions, ai_decisions)
    
    # Generate confusion matrix for calculating other metrics
    tn, fp, fn, tp = confusion_matrix(human_decisions, ai_decisions).ravel()
    
    # Calculate precision, recall, F1 score
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    
    # Calculate balanced accuracy
    sensitivity = recall
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    balanced_accuracy = (sensitivity + specificity) / 2
    
    # Conditional probability metrics
    # P(AI=1|Human=1) - how often AI saves when Human saves
    p_ai_given_human = tp / (tp + fn) if (tp + fn) > 0 else 0
    
    # P(Human=1|AI=1) - how often Human saves when AI saves
    p_human_given_ai = tp / (tp + fp) if (tp + fp) > 0 else 0
    
    # Asymmetric similarity - how well AI captures human preferences
    # Jaccard similarity = |intersection| / |union|
    jaccard = tp / (tp + fp + fn) if (tp + fp + fn) > 0 else 0
    
    # Dice coefficient = 2*|intersection| / (|A| + |B|)
    dice = 2 * tp / (2 * tp + fp + fn) if (2 * tp + fp + fn) > 0 else 0
    
    # Information theory metrics
    # Calculate joint and marginal probabilities
    n_samples = len(human_decisions)
    p_h1_a1 = tp / n_samples
    p_h0_a1 = fp / n_samples
    p_h1_a0 = fn / n_samples
    p_h0_a0 = tn / n_samples
    
    p_h1 = (tp + fn) / n_samples  # P(Human=1)
    p_h0 = (tn + fp) / n_samples  # P(Human=0)
    p_a1 = (tp + fp) / n_samples  # P(AI=1)
    p_a0 = (tn + fn) / n_samples  # P(AI=0)
    
    # Calculate mutual information
    mi = 0
    if p_h1_a1 > 0: mi += p_h1_a1 * np.log2(p_h1_a1 / (p_h1 * p_a1))
    if p_h0_a1 > 0: mi += p_h0_a1 * np.log2(p_h0_a1 / (p_h0 * p_a1))
    if p_h1_a0 > 0: mi += p_h1_a0 * np.log2(p_h1_a0 / (p_h1 * p_a0))
    if p_h0_a0 > 0: mi += p_h0_a0 * np.log2(p_h0_a0 / (p_h0 * p_a0))
    
    # Calculate entropies
    h_entropy = -p_h1 * np.log2(p_h1) if p_h1 > 0 else 0
    h_entropy += -p_h0 * np.log2(p_h0) if p_h0 > 0 else 0
    
    a_entropy = -p_a1 * np.log2(p_a1) if p_a1 > 0 else 0
    a_entropy += -p_a0 * np.log2(p_a0) if p_a0 > 0 else 0
    
    # Calculate normalized mutual information
    nmi = mi / max(h_entropy, a_entropy) if max(h_entropy, a_entropy) > 0 else 0
    
    # Return all metrics in a dictionary
    return {
        "Simple Agreement (%)": agreement,
        "Cohen's Kappa": kappa,
        "Matthews Correlation Coefficient": mcc,
        "F1 Score": f1,
        "Precision (AI alignment when Human saves)": precision,
        "Recall (Human preferences captured by AI)": recall,
        "Balanced Accuracy": balanced_accuracy,
        "P(AI saves | Human saves)": p_ai_given_human,
        "P(Human saves | AI saves)": p_human_given_ai,
        "Jaccard Similarity": jaccard,
        "Dice Coefficient": dice,
        "Mutual Information": mi,
        "Normalized Mutual Information": nmi,
        "Confusion Matrix": {
            "True Negatives": tn,  # Both decided not to save
            "False Positives": fp,  # AI saved but Human didn't
            "False Negatives": fn,  # Human saved but AI didn't
            "True Positives": tp,   # Both decided to save
        }
    }

# Example usage
if __name__ == "__main__":
    # Example data (replace with your actual data)
    data = pd.DataFrame({
        'shape_id': range(1, 101),
        'human_save': np.random.randint(0, 2, 100),
        'ai_save': np.random.randint(0, 2, 100)
    })
    
    # Calculate metrics
    metrics = calculate_alignment_metrics(data['human_save'], data['ai_save'])
    
    # Print metrics
    for metric, value in metrics.items():
        if isinstance(value, dict):
            print(f"{metric}:")
            for submetric, subvalue in value.items():
                print(f"  {submetric}: {subvalue}")
        else:
            print(f"{metric}: {value:.4f}")