# Bot Hunter: Detecting Automated Behavior via Consistency Analysis

This notebook demonstrates how to use revealed preference theory to detect bots and automated scripts.

**Key Insight**: Humans typically make consistent choices (high AEI). Bots that click randomly or follow hard-coded rules often fail consistency tests.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pyrevealed import ConsumerSession, compute_aei, check_garp
from pyrevealed.viz import plot_aei_distribution

## 1. Simulating Human vs Bot Behavior

Let's simulate:
- **Human users**: Make choices that roughly maximize utility
- **Bot users**: Make random or scripted choices

In [None]:
def generate_rational_user(n_observations=20, n_goods=3, noise=0.1):
    """
    Generate data for a roughly rational user.
    They prefer cheaper goods with some noise.
    """
    prices = np.random.uniform(0.5, 2.0, (n_observations, n_goods))
    
    # Rational behavior: spend more on cheaper goods
    budget = 10.0
    quantities = np.zeros((n_observations, n_goods))
    
    for t in range(n_observations):
        # Prefer goods with lower prices (inverse relationship)
        preferences = 1.0 / prices[t] + np.random.normal(0, noise, n_goods)
        preferences = np.maximum(preferences, 0.1)
        preferences = preferences / preferences.sum()
        
        # Allocate budget according to preferences
        spending = preferences * budget
        quantities[t] = spending / prices[t]
    
    return ConsumerSession(prices=prices, quantities=quantities)


def generate_random_bot(n_observations=20, n_goods=3):
    """
    Generate data for a bot that clicks randomly.
    No regard for prices - purely random behavior.
    """
    prices = np.random.uniform(0.5, 2.0, (n_observations, n_goods))
    quantities = np.random.uniform(0, 5, (n_observations, n_goods))
    
    return ConsumerSession(prices=prices, quantities=quantities)

In [None]:
# Generate sample users
np.random.seed(42)

human_user = generate_rational_user(n_observations=30)
bot_user = generate_random_bot(n_observations=30)

print("Human user sample:")
print(f"  Prices shape: {human_user.prices.shape}")
print(f"  Quantities shape: {human_user.quantities.shape}")

## 2. Analyzing Consistency Scores

In [None]:
# Compute AEI for both
human_aei = compute_aei(human_user)
bot_aei = compute_aei(bot_user)

print("Consistency Analysis:")
print(f"  Human AEI: {human_aei.efficiency_index:.4f}")
print(f"  Bot AEI:   {bot_aei.efficiency_index:.4f}")
print()
print(f"  Human is perfectly consistent: {human_aei.is_perfectly_consistent}")
print(f"  Bot is perfectly consistent:   {bot_aei.is_perfectly_consistent}")

## 3. Population Analysis

Let's simulate a population with mixed humans and bots.

In [None]:
# Generate population
n_humans = 80
n_bots = 20

human_scores = []
bot_scores = []

print("Generating population...")
for i in range(n_humans):
    user = generate_rational_user(n_observations=25, noise=0.2)
    score = compute_aei(user).efficiency_index
    human_scores.append(score)

for i in range(n_bots):
    user = generate_random_bot(n_observations=25)
    score = compute_aei(user).efficiency_index
    bot_scores.append(score)

print(f"\nHuman scores - Mean: {np.mean(human_scores):.3f}, Std: {np.std(human_scores):.3f}")
print(f"Bot scores   - Mean: {np.mean(bot_scores):.3f}, Std: {np.std(bot_scores):.3f}")

In [None]:
# Visualize distributions
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].hist(human_scores, bins=20, alpha=0.7, label='Humans', color='blue')
axes[0].hist(bot_scores, bins=20, alpha=0.7, label='Bots', color='red')
axes[0].axvline(0.85, color='green', linestyle='--', label='Detection Threshold')
axes[0].set_xlabel('AEI Score')
axes[0].set_ylabel('Count')
axes[0].set_title('AEI Distribution: Humans vs Bots')
axes[0].legend()

# Combined distribution
all_scores = human_scores + bot_scores
plot_aei_distribution(all_scores, ax=axes[1])

plt.tight_layout()
plt.show()

## 4. Bot Detection Pipeline

In [None]:
def detect_bot(session, threshold=0.85):
    """
    Simple bot detection based on AEI threshold.
    
    Returns:
        is_bot: Boolean indicating if likely a bot
        confidence: How confident we are (distance from threshold)
        score: The AEI score
    """
    aei_result = compute_aei(session)
    score = aei_result.efficiency_index
    
    is_bot = score < threshold
    confidence = abs(score - threshold)
    
    return {
        'is_bot': is_bot,
        'confidence': confidence,
        'score': score,
        'is_perfect': aei_result.is_perfectly_consistent
    }

# Test on our examples
human_result = detect_bot(human_user)
bot_result = detect_bot(bot_user)

print("Detection Results:")
print(f"\nHuman user: {human_result}")
print(f"Bot user:   {bot_result}")

## 5. Detection Accuracy Analysis

In [None]:
# Calculate detection metrics
threshold = 0.85

# True positives: bots correctly identified
true_positives = sum(1 for s in bot_scores if s < threshold)
# False negatives: bots missed
false_negatives = sum(1 for s in bot_scores if s >= threshold)
# True negatives: humans correctly identified
true_negatives = sum(1 for s in human_scores if s >= threshold)
# False positives: humans flagged as bots
false_positives = sum(1 for s in human_scores if s < threshold)

precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

print(f"Detection Metrics (threshold={threshold}):")
print(f"  Precision: {precision:.2%}")
print(f"  Recall:    {recall:.2%}")
print(f"  F1 Score:  {f1:.2%}")
print()
print(f"Confusion Matrix:")
print(f"  True Positives:  {true_positives}")
print(f"  False Positives: {false_positives}")
print(f"  True Negatives:  {true_negatives}")
print(f"  False Negatives: {false_negatives}")

## Key Takeaways

1. **Random bots have low AEI scores** because their choices don't follow any consistent preference pattern

2. **Humans have high AEI scores** even with noise, because they roughly maximize utility

3. **Threshold tuning** depends on your tolerance for false positives vs false negatives

4. **This complements ML approaches** - use AEI as a feature in your fraud detection pipeline