# UI Detective: Measuring UX Impact via Consistency Analysis

This notebook shows how to use revealed preference metrics to evaluate UI/UX changes.

**Key Insight**: A confusing UI causes users to make choices they regret (inconsistent choices). We can measure this with the Money Pump Index.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pyrevealed import ConsumerSession, compute_aei, compute_mpi

## 1. The A/B Test Scenario

You've launched a new checkout flow. Conversion rate is the same, but customer satisfaction dropped. Why?

Hypothesis: The new UI is confusing users into making choices they don't actually want.

In [None]:
def simulate_user_choices(n_sessions=20, confusion_level=0.0):
    """
    Simulate user purchase decisions.
    
    confusion_level: 0.0 = clear UI, 1.0 = very confusing UI
    
    Products: [Standard, Premium, Enterprise]
    """
    n_products = 3
    
    # User's true preferences (they prefer cheaper options with similar features)
    true_preferences = np.array([0.5, 0.35, 0.15])
    
    # Prices vary slightly over sessions (sales, promos, etc.)
    base_prices = np.array([10.0, 25.0, 50.0])
    prices = np.zeros((n_sessions, n_products))
    quantities = np.zeros((n_sessions, n_products))
    
    for i in range(n_sessions):
        # Prices with some variation
        prices[i] = base_prices * np.random.uniform(0.8, 1.2, n_products)
        
        # Without confusion: choose based on true preferences adjusted for price
        adjusted_prefs = true_preferences / prices[i]
        
        # With confusion: add noise that can flip choices
        if confusion_level > 0:
            noise = np.random.normal(0, confusion_level, n_products)
            adjusted_prefs = adjusted_prefs + noise * adjusted_prefs.max()
        
        adjusted_prefs = np.maximum(adjusted_prefs, 0.01)
        adjusted_prefs = adjusted_prefs / adjusted_prefs.sum()
        
        # Make a purchase (choose one product)
        choice = np.random.choice(n_products, p=adjusted_prefs)
        quantities[i, choice] = 1
    
    return ConsumerSession(prices=prices, quantities=quantities)

In [None]:
np.random.seed(42)

# Simulate A/B test groups
n_users_per_group = 50
sessions_per_user = 15

# Control: Old clear UI
control_sessions = [simulate_user_choices(sessions_per_user, confusion_level=0.1) 
                   for _ in range(n_users_per_group)]

# Variant: New confusing UI  
variant_sessions = [simulate_user_choices(sessions_per_user, confusion_level=0.5)
                   for _ in range(n_users_per_group)]

print(f"Generated {n_users_per_group} users per group")
print(f"Each user has {sessions_per_user} purchase sessions")

## 2. Computing Confusion Metrics

In [None]:
# Calculate MPI for each user in each group
control_mpi = [compute_mpi(s).mpi_value for s in control_sessions]
variant_mpi = [compute_mpi(s).mpi_value for s in variant_sessions]

control_aei = [compute_aei(s).efficiency_index for s in control_sessions]
variant_aei = [compute_aei(s).efficiency_index for s in variant_sessions]

print("Group Statistics:")
print("\nControl (Old UI):")
print(f"  Mean MPI: {np.mean(control_mpi):.4f} (std: {np.std(control_mpi):.4f})")
print(f"  Mean AEI: {np.mean(control_aei):.4f} (std: {np.std(control_aei):.4f})")

print("\nVariant (New UI):")
print(f"  Mean MPI: {np.mean(variant_mpi):.4f} (std: {np.std(variant_mpi):.4f})")
print(f"  Mean AEI: {np.mean(variant_aei):.4f} (std: {np.std(variant_aei):.4f})")

In [None]:
# Visualize
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# MPI Distribution
axes[0].hist(control_mpi, bins=20, alpha=0.7, label='Control (Old UI)', color='blue')
axes[0].hist(variant_mpi, bins=20, alpha=0.7, label='Variant (New UI)', color='red')
axes[0].axvline(np.mean(control_mpi), color='blue', linestyle='--')
axes[0].axvline(np.mean(variant_mpi), color='red', linestyle='--')
axes[0].set_xlabel('Money Pump Index')
axes[0].set_ylabel('Count')
axes[0].set_title('MPI Distribution (Higher = More Confused)')
axes[0].legend()

# AEI Distribution
axes[1].hist(control_aei, bins=20, alpha=0.7, label='Control (Old UI)', color='blue')
axes[1].hist(variant_aei, bins=20, alpha=0.7, label='Variant (New UI)', color='red')
axes[1].axvline(np.mean(control_aei), color='blue', linestyle='--')
axes[1].axvline(np.mean(variant_aei), color='red', linestyle='--')
axes[1].set_xlabel('Afriat Efficiency Index')
axes[1].set_ylabel('Count')
axes[1].set_title('AEI Distribution (Lower = More Confused)')
axes[1].legend()

plt.tight_layout()
plt.show()

## 3. Statistical Significance

In [None]:
from scipy import stats

# T-test for MPI difference
t_stat_mpi, p_value_mpi = stats.ttest_ind(control_mpi, variant_mpi)

# T-test for AEI difference
t_stat_aei, p_value_aei = stats.ttest_ind(control_aei, variant_aei)

print("Statistical Tests:")
print(f"\nMPI Difference:")
print(f"  t-statistic: {t_stat_mpi:.4f}")
print(f"  p-value: {p_value_mpi:.6f}")
print(f"  Significant at 0.05: {p_value_mpi < 0.05}")

print(f"\nAEI Difference:")
print(f"  t-statistic: {t_stat_aei:.4f}")
print(f"  p-value: {p_value_aei:.6f}")
print(f"  Significant at 0.05: {p_value_aei < 0.05}")

## 4. Business Recommendation

In [None]:
def generate_ab_test_report(control_mpi, variant_mpi, control_aei, variant_aei):
    """
    Generate A/B test report based on consistency metrics.
    """
    mpi_increase = (np.mean(variant_mpi) - np.mean(control_mpi)) / np.mean(control_mpi) * 100
    aei_decrease = (np.mean(control_aei) - np.mean(variant_aei)) / np.mean(control_aei) * 100
    
    _, p_mpi = stats.ttest_ind(control_mpi, variant_mpi)
    _, p_aei = stats.ttest_ind(control_aei, variant_aei)
    
    print("="*60)
    print("A/B TEST REPORT: UI Confusion Analysis")
    print("="*60)
    print()
    print("METRICS SUMMARY:")
    print(f"  Money Pump Index increased by {mpi_increase:.1f}% (p={p_mpi:.4f})")
    print(f"  Efficiency Index decreased by {aei_decrease:.1f}% (p={p_aei:.4f})")
    print()
    
    if mpi_increase > 50 and p_mpi < 0.05:
        recommendation = "ROLLBACK RECOMMENDED"
        explanation = (
            "The new UI significantly increases user confusion. "
            "Users are making choices inconsistent with their preferences, "
            "which will lead to higher return rates and lower satisfaction."
        )
    elif mpi_increase > 20 and p_mpi < 0.05:
        recommendation = "INVESTIGATE FURTHER"
        explanation = (
            "The new UI shows moderate increase in user confusion. "
            "Consider usability testing to identify specific pain points."
        )
    else:
        recommendation = "PROCEED WITH CAUTION"
        explanation = "No significant increase in user confusion detected."
    
    print(f"RECOMMENDATION: {recommendation}")
    print(f"\n{explanation}")
    print()
    print("="*60)

generate_ab_test_report(control_mpi, variant_mpi, control_aei, variant_aei)

## Key Takeaways

1. **MPI as a confusion metric**: Higher MPI means users make more "mistakes" they'd want to undo

2. **Beyond conversion rate**: A confusing UI might maintain conversions but hurt satisfaction and returns

3. **Per-user analysis**: Revealed preference works on individual users, not just aggregates

4. **Statistical rigor**: Use proper tests to determine if differences are significant