# Cost-Benefit Analysis: Adversarial Fraud Detection

This notebook performs Monte Carlo simulation to quantify the business value of adversarial robustness testing.

## Key Questions
1. What is the expected loss from adversarial attacks on our fraud detection system?
2. How much does improving model robustness reduce expected losses?
3. What is the ROI of red team testing?

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

np.random.seed(42)

## 1. Define Business Parameters

In [None]:
# Business parameters
PARAMS = {
    # Transaction volume
    'daily_transactions': 100_000,
    'fraud_rate': 0.0013,  # 0.13% from PaySim data
    'avg_fraud_amount': 25_000,  # Average fraudulent transaction amount
    
    # Model performance
    'model_auc': 0.9989,
    'detection_rate': 0.95,  # True positive rate at operating threshold
    'false_positive_rate': 0.01,
    
    # Adversarial risk (before robustness testing)
    'evasion_rate_before': 0.20,  # 20% of attacks succeed
    'attacker_sophistication': 0.05,  # 5% of fraudsters use adversarial techniques
    
    # After robustness improvements
    'evasion_rate_after': 0.05,  # Reduced to 5% after improvements
    
    # Costs
    'cost_per_fraud': 25_000,  # Direct loss per undetected fraud
    'cost_per_fp': 50,  # Customer friction cost per false positive
    'red_team_cost': 50_000,  # Annual cost of red team program
    'remediation_cost': 100_000,  # One-time cost to improve model
}

print("Business Parameters:")
for k, v in PARAMS.items():
    print(f"  {k}: {v:,}" if isinstance(v, int) else f"  {k}: {v}")

## 2. Monte Carlo Simulation Setup

In [None]:
def simulate_annual_losses(params, evasion_rate, n_simulations=10000):
    """
    Monte Carlo simulation of annual fraud losses.
    
    Returns array of simulated annual losses.
    """
    results = []
    
    for _ in range(n_simulations):
        # Daily fraud attempts (Poisson distributed)
        daily_frauds = np.random.poisson(
            params['daily_transactions'] * params['fraud_rate'],
            365
        )
        
        annual_fraud_attempts = daily_frauds.sum()
        
        # Split into regular and adversarial attacks
        adversarial_attacks = int(annual_fraud_attempts * params['attacker_sophistication'])
        regular_attacks = annual_fraud_attempts - adversarial_attacks
        
        # Detection outcomes
        # Regular attacks: detected at model's detection rate
        regular_missed = np.random.binomial(regular_attacks, 1 - params['detection_rate'])
        
        # Adversarial attacks: affected by evasion rate
        adversarial_evaded = np.random.binomial(adversarial_attacks, evasion_rate)
        adversarial_detected = adversarial_attacks - adversarial_evaded
        adversarial_missed = adversarial_evaded + np.random.binomial(
            adversarial_detected, 1 - params['detection_rate']
        )
        
        # Total undetected fraud
        total_missed = regular_missed + adversarial_missed
        
        # Calculate losses (with variance in fraud amounts)
        fraud_amounts = np.random.lognormal(
            np.log(params['avg_fraud_amount']),
            0.5,
            total_missed
        )
        fraud_loss = fraud_amounts.sum()
        
        # False positive costs
        legitimate_txns = params['daily_transactions'] * 365 * (1 - params['fraud_rate'])
        false_positives = int(legitimate_txns * params['false_positive_rate'])
        fp_cost = false_positives * params['cost_per_fp']
        
        total_loss = fraud_loss + fp_cost
        results.append({
            'fraud_loss': fraud_loss,
            'fp_cost': fp_cost,
            'total_loss': total_loss,
            'frauds_missed': total_missed,
            'adversarial_evaded': adversarial_missed
        })
    
    return pd.DataFrame(results)

## 3. Run Simulations: Before vs After Robustness Improvements

In [None]:
print("Running Monte Carlo simulations (10,000 iterations each)...")

# Simulate before improvements
results_before = simulate_annual_losses(PARAMS, PARAMS['evasion_rate_before'])
print(f"\nBefore robustness improvements (evasion rate: {PARAMS['evasion_rate_before']:.0%})")

# Simulate after improvements
results_after = simulate_annual_losses(PARAMS, PARAMS['evasion_rate_after'])
print(f"After robustness improvements (evasion rate: {PARAMS['evasion_rate_after']:.0%})")

In [None]:
def summarize_results(df, label):
    """Print summary statistics for simulation results."""
    print(f"\n{'='*60}")
    print(f"{label}")
    print(f"{'='*60}")
    print(f"\nAnnual Fraud Losses:")
    print(f"  Mean:   ${df['fraud_loss'].mean():,.0f}")
    print(f"  Median: ${df['fraud_loss'].median():,.0f}")
    print(f"  Std:    ${df['fraud_loss'].std():,.0f}")
    print(f"  95th:   ${df['fraud_loss'].quantile(0.95):,.0f}")
    print(f"  99th:   ${df['fraud_loss'].quantile(0.99):,.0f}")
    
    print(f"\nFrauds Missed per Year:")
    print(f"  Mean: {df['frauds_missed'].mean():,.0f}")
    print(f"  From adversarial attacks: {df['adversarial_evaded'].mean():,.0f}")
    
    print(f"\nTotal Annual Cost (fraud + FP):")
    print(f"  Mean: ${df['total_loss'].mean():,.0f}")

summarize_results(results_before, "BEFORE Robustness Improvements")
summarize_results(results_after, "AFTER Robustness Improvements")

## 4. Calculate ROI of Red Team Program

In [None]:
# Calculate savings
annual_savings = results_before['total_loss'].mean() - results_after['total_loss'].mean()
total_investment = PARAMS['red_team_cost'] + PARAMS['remediation_cost']

# ROI calculation
roi = (annual_savings - total_investment) / total_investment * 100
payback_months = total_investment / (annual_savings / 12)

print("\n" + "="*60)
print("ROI ANALYSIS")
print("="*60)
print(f"\nAnnual Loss Reduction: ${annual_savings:,.0f}")
print(f"\nInvestment:")
print(f"  Red Team Program: ${PARAMS['red_team_cost']:,}")
print(f"  Model Remediation: ${PARAMS['remediation_cost']:,}")
print(f"  Total: ${total_investment:,}")
print(f"\nFirst Year ROI: {roi:.1f}%")
print(f"Payback Period: {payback_months:.1f} months")

# 5-year NPV calculation (10% discount rate)
discount_rate = 0.10
years = 5
npv = -total_investment + sum(
    (annual_savings - PARAMS['red_team_cost']) / (1 + discount_rate)**year
    for year in range(1, years + 1)
)
print(f"\n5-Year NPV (10% discount): ${npv:,.0f}")

## 5. Visualizations

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. Distribution of annual losses
ax1 = axes[0, 0]
ax1.hist(results_before['fraud_loss'] / 1e6, bins=50, alpha=0.6, label='Before', color='red')
ax1.hist(results_after['fraud_loss'] / 1e6, bins=50, alpha=0.6, label='After', color='green')
ax1.axvline(results_before['fraud_loss'].mean() / 1e6, color='red', linestyle='--', label='Mean (Before)')
ax1.axvline(results_after['fraud_loss'].mean() / 1e6, color='green', linestyle='--', label='Mean (After)')
ax1.set_xlabel('Annual Fraud Loss ($ millions)')
ax1.set_ylabel('Frequency')
ax1.set_title('Distribution of Annual Fraud Losses')
ax1.legend()

# 2. Box plot comparison
ax2 = axes[0, 1]
comparison_df = pd.DataFrame({
    'Before': results_before['fraud_loss'] / 1e6,
    'After': results_after['fraud_loss'] / 1e6
})
comparison_df.boxplot(ax=ax2)
ax2.set_ylabel('Annual Fraud Loss ($ millions)')
ax2.set_title('Fraud Loss Comparison')

# 3. Cumulative distribution
ax3 = axes[1, 0]
sorted_before = np.sort(results_before['fraud_loss'])
sorted_after = np.sort(results_after['fraud_loss'])
p = np.linspace(0, 1, len(sorted_before))
ax3.plot(sorted_before / 1e6, p, label='Before', color='red')
ax3.plot(sorted_after / 1e6, p, label='After', color='green')
ax3.axhline(0.95, color='gray', linestyle=':', alpha=0.5)
ax3.axhline(0.99, color='gray', linestyle=':', alpha=0.5)
ax3.set_xlabel('Annual Fraud Loss ($ millions)')
ax3.set_ylabel('Cumulative Probability')
ax3.set_title('Loss Distribution (CDF)')
ax3.legend()

# 4. ROI waterfall
ax4 = axes[1, 1]
categories = ['Loss\nBefore', 'Savings', 'Investment', 'Net\nBenefit']
values = [
    results_before['fraud_loss'].mean() / 1e6,
    -annual_savings / 1e6,
    total_investment / 1e6,
    (annual_savings - total_investment) / 1e6
]
colors = ['red', 'green', 'orange', 'blue']
ax4.bar(categories, values, color=colors)
ax4.axhline(0, color='black', linewidth=0.5)
ax4.set_ylabel('$ millions')
ax4.set_title('Annual Financial Impact')

plt.tight_layout()
plt.savefig('../outputs/cost_analysis.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nVisualization saved to outputs/cost_analysis.png")

## 6. Sensitivity Analysis

In [None]:
# Test different evasion rate improvements
evasion_rates = [0.25, 0.20, 0.15, 0.10, 0.05, 0.02]
savings_by_rate = []

print("Sensitivity Analysis: Savings vs Evasion Rate")
print("-" * 50)

baseline_loss = results_before['total_loss'].mean()

for rate in evasion_rates:
    sim = simulate_annual_losses(PARAMS, rate, n_simulations=1000)
    avg_loss = sim['total_loss'].mean()
    savings = baseline_loss - avg_loss
    savings_by_rate.append(savings)
    print(f"  Evasion rate {rate:5.0%}: Annual savings ${savings:>12,.0f}")

# Plot sensitivity
plt.figure(figsize=(10, 5))
plt.plot([r * 100 for r in evasion_rates], [s / 1e6 for s in savings_by_rate], 'bo-', linewidth=2, markersize=8)
plt.axhline(total_investment / 1e6, color='red', linestyle='--', label=f'Investment (${total_investment/1e6:.1f}M)')
plt.xlabel('Evasion Rate After Improvements (%)')
plt.ylabel('Annual Savings ($ millions)')
plt.title('Sensitivity: Savings vs Achieved Evasion Rate')
plt.legend()
plt.grid(True, alpha=0.3)
plt.gca().invert_xaxis()  # Lower evasion rate = better
plt.tight_layout()
plt.savefig('../outputs/sensitivity_analysis.png', dpi=150)
plt.show()

## 7. Executive Summary

In [None]:
print("""
╔══════════════════════════════════════════════════════════════════════════════╗
║                         EXECUTIVE SUMMARY                                    ║
╠══════════════════════════════════════════════════════════════════════════════╣
║                                                                              ║
║  BUSINESS CONTEXT                                                            ║
║  • Daily transaction volume: 100,000 transactions                            ║
║  • Baseline fraud rate: 0.13%                                                ║
║  • Average fraud transaction: $25,000                                        ║
║  • Adversarial attacker prevalence: 5% of fraudsters                        ║
║                                                                              ║
╠══════════════════════════════════════════════════════════════════════════════╣
║                                                                              ║
║  KEY FINDINGS                                                                ║""")

print(f"""║  • Expected annual fraud loss (before): ${results_before['fraud_loss'].mean():>15,.0f}         ║
║  • Expected annual fraud loss (after):  ${results_after['fraud_loss'].mean():>15,.0f}         ║
║  • Annual loss reduction:               ${annual_savings:>15,.0f}         ║
║  • Investment required:                 ${total_investment:>15,}         ║
║  • First-year ROI:                      {roi:>15.1f}%        ║
║  • Payback period:                      {payback_months:>13.1f} months       ║
║  • 5-year NPV:                          ${npv:>15,.0f}         ║""")

print("""
║                                                                              ║
╠══════════════════════════════════════════════════════════════════════════════╣
║                                                                              ║
║  RECOMMENDATION                                                              ║
║  Implement adversarial red team testing program. The investment pays for     ║
║  itself within the first year and provides ongoing protection against        ║
║  sophisticated fraud attacks.                                                ║
║                                                                              ║
╚══════════════════════════════════════════════════════════════════════════════╝
""")

---

## Appendix: Assumptions & Limitations

**Assumptions:**
- Fraud attempts follow a Poisson distribution
- Fraud amounts follow a log-normal distribution
- Adversarial attackers represent 5% of total fraudsters
- Model performance remains stable over the analysis period

**Limitations:**
- Does not account for regulatory fines or reputational damage
- Assumes constant attacker sophistication over time
- Does not model adaptive attackers who learn from failures

**Data Sources:**
- PaySim synthetic dataset for fraud rate baseline
- Red team campaign results for evasion rate estimates