# 02_analysis_minimal.ipynb: Ultra-Simple Bounded Estimation

## Context
- **Ad-seers**: Users shown advertisements
- **Secret shoppers**: Organic buyers without ad exposure (NOT a control group)

## Mathematical Framework

### Observable Quantities
- $N_1$ = Number of ad-seers
- $n_{11}$ = Ad-seers who purchased in Period 2
- $n_{01}$ = Secret shoppers (organic buyers)

### Calculated Metrics
- $p_1 = n_{11} / N_1$ = Purchase rate among ad-seers
- $c = n_{01} / N_1$ = Contamination ratio
- $\pi \in [\pi_L, \pi_U]$ = Treatment probability bounds

### Bounded ATE Formula
$$\text{ATE} \in \left[ p_1 - \frac{\pi_U}{1-\pi_U} c, \; p_1 - \frac{\pi_L}{1-\pi_L} c \right]$$

In [None]:
import pandas as pd
import numpy as np

print("Libraries loaded")

## Step 1: Load Data

In [None]:
# Load the minimal data
df = pd.read_csv('causal_estimation_counts_minimal.csv')
print("Data loaded:")
print(df)

# Extract values
N_1 = df['n_1'].values[0]
n_11 = df['n_11'].values[0]
N_0 = df['n_0'].values[0]
n_01 = df['n_01'].values[0]

print(f"\nExtracted values:")
print(f"  N_1 (ad-seers): {N_1:,}")
print(f"  n_11 (ad-seers who purchased): {n_11:,}")
print(f"  N_0 (secret shoppers observed): {N_0:,}")
print(f"  n_01 (secret shoppers who purchased): {n_01:,}")

## Step 2: Calculate Key Metrics

In [None]:
# Calculate p_1 and c
p_1 = n_11 / N_1 if N_1 > 0 else 0
c = n_01 / N_1 if N_1 > 0 else 0

# For reference only (biased)
p_0_observed = n_01 / N_0 if N_0 > 0 else 0
observed_diff = p_1 - p_0_observed

print("KEY METRICS:")
print("=" * 60)
print(f"p_1 (ad-seers purchase rate): {p_1:.4f}")
print(f"c (contamination ratio): {c:.4f}")
print(f"\nFor reference (NOT valid for causal inference):")
print(f"p_0_observed (secret shopper rate): {p_0_observed:.4f}")
print(f"Observed difference: {observed_diff:.4f}")
print("\n⚠️ Secret shoppers are a selected subset, not a control group!")

## Step 3: Bounded Estimation

In [None]:
def calculate_ate_bounds(p_1, c, pi_lower, pi_upper):
    """Calculate ATE bounds using the correct formula."""
    factor_upper = pi_upper / (1 - pi_upper)
    factor_lower = pi_lower / (1 - pi_lower)
    
    lower_bound = p_1 - factor_upper * c
    upper_bound = p_1 - factor_lower * c
    
    pi_mid = (pi_lower + pi_upper) / 2
    factor_mid = pi_mid / (1 - pi_mid)
    point_estimate = p_1 - factor_mid * c
    
    return lower_bound, upper_bound, point_estimate

# Define treatment probability bounds
PI_LOWER = 0.95
PI_UPPER = 0.99

# Calculate bounds
lower, upper, point = calculate_ate_bounds(p_1, c, PI_LOWER, PI_UPPER)

# Apply monotonicity constraint (ATE >= 0)
lower_mono = max(0, lower)
upper_mono = max(0, upper)
point_mono = max(0, point)

print(f"BOUNDED ESTIMATION (π ∈ [{PI_LOWER}, {PI_UPPER}])")
print("=" * 60)
print(f"\nWithout monotonicity:")
print(f"  ATE ∈ [{lower:.4f}, {upper:.4f}]")
print(f"  Point estimate: {point:.4f}")
print(f"\nWith monotonicity (ATE ≥ 0):")
print(f"  ATE ∈ [{lower_mono:.4f}, {upper_mono:.4f}]")
print(f"  Point estimate: {point_mono:.4f}")

# Calculate percentage lift
if p_1 > 0:
    lift = 100 * point_mono / p_1
    print(f"\nPercentage lift (relative to ad-seers baseline):")
    print(f"  {lift:.2f}%")

## Step 4: Sensitivity Analysis

In [None]:
# Test different π scenarios
scenarios = [
    (0.90, 0.95, 'Conservative'),
    (0.95, 0.99, 'Baseline'),
    (0.98, 0.995, 'Aggressive'),
    (0.99, 0.999, 'Extreme')
]

print("SENSITIVITY ANALYSIS")
print("=" * 80)
print(f"{'Scenario':<15} {'π Range':<15} {'ATE Lower':<12} {'ATE Upper':<12} {'Point Est':<12} {'Width':<10}")
print("-" * 80)

for pi_low, pi_high, scenario in scenarios:
    l, u, p = calculate_ate_bounds(p_1, c, pi_low, pi_high)
    l_mono = max(0, l)
    u_mono = max(0, u)
    p_mono = max(0, p)
    width = u_mono - l_mono
    
    pi_str = f"[{pi_low}, {pi_high}]"
    print(f"{scenario:<15} {pi_str:<15} {l_mono:>11.4f} {u_mono:>11.4f} {p_mono:>11.4f} {width:>9.4f}")

print("\nKey insight: Tighter π bounds → narrower ATE range but higher estimates")

## Step 5: Generate Report

In [None]:
# Save results to file
with open('bounded_ate_minimal_results.txt', 'w') as f:
    f.write("MINIMAL BOUNDED CAUSAL EFFECT ESTIMATION\n")
    f.write("=" * 60 + "\n\n")
    
    f.write("DATA\n")
    f.write("-" * 40 + "\n")
    f.write(f"Ad-seers (N_1): {N_1:,}\n")
    f.write(f"Ad-seers who purchased (n_11): {n_11:,}\n")
    f.write(f"Secret shoppers observed (N_0): {N_0:,}\n")
    f.write(f"Secret shoppers who purchased (n_01): {n_01:,}\n\n")
    
    f.write("KEY METRICS\n")
    f.write("-" * 40 + "\n")
    f.write(f"p_1 (ad-seers purchase rate): {p_1:.4f}\n")
    f.write(f"c (contamination ratio): {c:.4f}\n")
    f.write(f"Observed difference (biased): {observed_diff:.4f}\n\n")
    
    f.write(f"BOUNDED ATE (π ∈ [{PI_LOWER}, {PI_UPPER}])\n")
    f.write("-" * 40 + "\n")
    f.write(f"Range: [{lower_mono:.4f}, {upper_mono:.4f}]\n")
    f.write(f"Point estimate: {point_mono:.4f}\n")
    if p_1 > 0:
        f.write(f"Relative lift: {100 * point_mono / p_1:.2f}%\n")
    
    f.write("\n" + "=" * 60 + "\n")
    f.write("Note: Secret shoppers are a selected subset, not a control group.\n")
    f.write("The bounded approach acknowledges this fundamental uncertainty.\n")

print("✅ Results saved to bounded_ate_minimal_results.txt")

# Final summary
print("\n" + "=" * 60)
print("CONCLUSION")
print("=" * 60)
print(f"Best estimate: ATE = {point_mono:.4f} [{lower_mono:.4f}, {upper_mono:.4f}]")
print(f"This represents a {100 * point_mono / p_1:.1f}% lift in purchase probability")
print("from advertising (relative to the ad-seers baseline).")