# Robustness Checks

Testing estimate stability across specifications

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from src.models.doubly_robust import doubly_robust_estimation
import pickle

# Load data
df = pd.read_csv('../data/processed/preprocessed_ad_data.csv')
confounders = pd.read_csv('../data/processed/confounders.csv')['confounder'].tolist()

# Baseline estimate
with open('../results/doubly_robust_results.pkl', 'rb') as f:
    baseline_results = pickle.load(f)
baseline_ate = baseline_results['ate']

print(f"Baseline ATE: {baseline_ate:.4f}")

In [None]:
# Test 1: Subset Robustness
print("=== SUBSET ROBUSTNESS ===")

subsets = {
    'random_80pct': df.sample(frac=0.8, random_state=42),
    'random_70pct': df.sample(frac=0.7, random_state=123),
    'high_engagement': df[df['website_visits'] > df['website_visits'].median()],
    'older_users': df[df['age'] > df['age'].median()]
}

subset_results = {}

for name, subset_df in subsets.items():
    try:
        result = doubly_robust_estimation(subset_df, confounders)
        deviation = abs(result['ate'] - baseline_ate) / baseline_ate * 100
        subset_results[name] = {
            'ate': result['ate'],
            'deviation_pct': deviation
        }
        print(f"{name}: ATE = {result['ate']:.4f} (deviation: {deviation:.1f}%)")
    except Exception as e:
        print(f"{name}: Failed")

avg_deviation = np.mean([r['deviation_pct'] for r in subset_results.values()])
print(f"\nAverage deviation: {avg_deviation:.1f}%")
print(f"Robustness: {'✅ Strong' if avg_deviation < 10 else '⚠️ Moderate' if avg_deviation < 20 else '❌ Weak'}")

In [None]:
# Test 2: Confounder Sensitivity
print("\n=== CONFOUNDER SENSITIVITY ===")

# Test different confounder specifications
confounder_specs = {
    'all_confounders': confounders,
    'top_80pct': confounders[:int(len(confounders) * 0.8)],
    'top_60pct': confounders[:int(len(confounders) * 0.6)],
    'top_10': confounders[:10],
    'core_demographics': [c for c in ['age', 'income', 'website_visits', 'past_purchases'] if c in confounders]
}

confounder_results = {}

for spec_name, confounder_set in confounder_specs.items():
    try:
        result = doubly_robust_estimation(df, confounder_set)
        deviation = abs(result['ate'] - baseline_ate) / baseline_ate * 100
        confounder_results[spec_name] = {
            'ate': result['ate'],
            'deviation_pct': deviation
        }
        print(f"{spec_name}: ATE = {result['ate']:.4f} (n_conf: {len(confounder_set)}, dev: {deviation:.1f}%)")
    except Exception as e:
        print(f"{spec_name}: Failed")

avg_conf_deviation = np.mean([r['deviation_pct'] for r in confounder_results.values()])
print(f"\nAverage deviation: {avg_conf_deviation:.1f}%")
print(f"Sensitivity: {'✅ Low' if avg_conf_deviation < 8 else '⚠️ Moderate' if avg_conf_deviation < 15 else '❌ High'}")

In [None]:
# Test 3: Cross-Validation
print("\n=== CROSS-VALIDATION ROBUSTNESS ===")

cv_results = []
for i in range(5):
    try:
        split_df = df.sample(frac=0.75, random_state=42+i)
        result = doubly_robust_estimation(split_df, confounders)
        cv_results.append(result['ate'])
        print(f"Split {i+1}: ATE = {result['ate']:.4f}")
    except:
        print(f"Split {i+1}: Failed")

if cv_results:
    cv_mean = np.mean(cv_results)
    cv_std = np.std(cv_results)
    cv_coef = cv_std / abs(cv_mean) if cv_mean != 0 else float('inf')
    
    print(f"\nCV Mean: {cv_mean:.4f} ± {cv_std:.4f}")
    print(f"Coefficient of Variation: {cv_coef:.3f}")
    print(f"Consistency: {'✅ High' if cv_coef < 0.1 else '⚠️ Moderate' if cv_coef < 0.2 else '❌ Low'}")

# Visualize robustness
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Subset robustness
if subset_results:
    names = list(subset_results.keys())
    deviations = [subset_results[name]['deviation_pct'] for name in names]
    axes[0].bar(range(len(names)), deviations, alpha=0.7, color='skyblue')
    axes[0].set_xticks(range(len(names)))
    axes[0].set_xticklabels([n.replace('_', '\n') for n in names], rotation=45)
    axes[0].set_ylabel('Deviation from Baseline (%)')
    axes[0].set_title('Subset Robustness')

# CV results
if cv_results:
    axes[1].plot(range(1, len(cv_results)+1), cv_results, 'o-', linewidth=2, markersize=8)
    axes[1].axhline(baseline_ate, color='red', linestyle='--', label=f'Baseline = {baseline_ate:.4f}')
    axes[1].set_xlabel('CV Split')
    axes[1].set_ylabel('ATE Estimate')
    axes[1].set_title('Cross-Validation Results')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n✅ Robustness checks completed")