# Experiment 15: Adversarial Attack Resolution

This experiment addresses multiple adversarial review attacks:

| Attack | Issue | Fix |
|--------|-------|-----|
| 1 | Copula bias | Add Kendall's tau, Chi-square |
| 3 | Only 5 seeds | Increase to 20 seeds |
| 4 | Linear SCM trivial | Add nonlinear SCM |
| 7 | Cherry-picked data | Add Cover Type dataset |
| 8 | No privacy metrics | Add DCR, MIA |
| 9 | Single TRTR model | Add XGBoost, LogReg |

In [None]:
!pip install -q numpy pandas scikit-learn scipy matplotlib seaborn xgboost

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score
from sklearn.datasets import fetch_covtype
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

SEED = 42
np.random.seed(SEED)
print("Setup complete.")

## MISATA Synthesizer

In [None]:
class MISATASynthesizer:
    """Universal MISATA synthesizer."""
    
    def __init__(self, target_col=None, task='classification', random_state=42):
        self.target_col = target_col
        self.task = task
        self.random_state = random_state
        
    def fit(self, df):
        self.columns = list(df.columns)
        self.marginals = {col: {'values': df[col].values.copy()} for col in self.columns}
        
        uniform_df = df.copy()
        for col in self.columns:
            uniform_df[col] = stats.rankdata(df[col]) / (len(df) + 1)
        
        normal_df = uniform_df.apply(lambda x: stats.norm.ppf(np.clip(x, 0.001, 0.999)))
        corr_matrix = normal_df.corr().values
        corr_matrix = np.nan_to_num(corr_matrix, nan=0.0)
        np.fill_diagonal(corr_matrix, 1.0)
        
        eigvals, eigvecs = np.linalg.eigh(corr_matrix)
        eigvals = np.maximum(eigvals, 1e-6)
        corr_matrix = eigvecs @ np.diag(eigvals) @ eigvecs.T
        
        self.cholesky = np.linalg.cholesky(corr_matrix)
        
        if self.target_col and self.target_col in self.columns:
            feature_cols = [c for c in self.columns if c != self.target_col]
            if self.task == 'classification':
                self.target_model = GradientBoostingClassifier(n_estimators=50, max_depth=4, random_state=self.random_state)
            else:
                from sklearn.ensemble import GradientBoostingRegressor
                self.target_model = GradientBoostingRegressor(n_estimators=50, max_depth=4, random_state=self.random_state)
            self.target_model.fit(df[feature_cols], df[self.target_col])
            self.feature_cols = feature_cols
            self.target_rate = df[self.target_col].mean() if self.task == 'classification' else None
        return self
    
    def sample(self, n_samples, seed=None):
        if seed is None:
            seed = self.random_state
        rng = np.random.default_rng(seed)
        
        z = rng.standard_normal((n_samples, len(self.columns)))
        uniform = stats.norm.cdf(z @ self.cholesky.T)
        uniform = np.clip(uniform, 0.001, 0.999)
        
        synthetic_data = {}
        for i, col in enumerate(self.columns):
            if col == self.target_col:
                continue
            sorted_vals = np.sort(self.marginals[col]['values'])
            positions = np.linspace(0, 1, len(sorted_vals))
            synthetic_data[col] = np.interp(uniform[:, i], positions, sorted_vals)
        
        if self.target_col and self.target_col in self.columns:
            X_synth = pd.DataFrame({c: synthetic_data[c] for c in self.feature_cols})
            if self.task == 'classification':
                probs = self.target_model.predict_proba(X_synth)[:, 1]
                threshold = np.percentile(probs, (1 - self.target_rate) * 100)
                synthetic_data[self.target_col] = (probs >= threshold).astype(int)
            else:
                synthetic_data[self.target_col] = self.target_model.predict(X_synth)
        
        return pd.DataFrame(synthetic_data)[self.columns]

print("Synthesizer defined.")

---
## Attack 1 Fix: Comprehensive Correlation Metrics

In [None]:
def comprehensive_correlation_metrics(real_df, synth_df):
    """
    Compute multiple correlation metrics beyond Pearson.
    """
    metrics = {}
    
    # 1. Pearson (original)
    real_corr = real_df.corr().values.flatten()
    synth_corr = synth_df.corr().values.flatten()
    mask = ~(np.isnan(real_corr) | np.isnan(synth_corr))
    metrics['pearson_similarity'] = np.corrcoef(real_corr[mask], synth_corr[mask])[0, 1]
    
    # 2. Kendall's Tau (rank-based, better for ordinal)
    kendall_similarities = []
    for col in real_df.columns:
        tau, _ = stats.kendalltau(real_df[col], synth_df[col].iloc[:len(real_df)])
        if not np.isnan(tau):
            kendall_similarities.append(tau)
    metrics['kendall_tau_mean'] = np.mean(kendall_similarities)
    
    # 3. Spearman's Rho (rank correlation)
    real_spearman = real_df.corr(method='spearman').values.flatten()
    synth_spearman = synth_df.corr(method='spearman').values.flatten()
    mask = ~(np.isnan(real_spearman) | np.isnan(synth_spearman))
    metrics['spearman_similarity'] = np.corrcoef(real_spearman[mask], synth_spearman[mask])[0, 1]
    
    # 4. Tail Dependence (for financial data)
    # Approximate via extreme value correlation
    tail_corrs = []
    for col in real_df.columns:
        threshold = np.percentile(real_df[col], 95)
        real_tail = real_df[col] > threshold
        synth_tail = synth_df[col] > threshold
        if real_tail.sum() > 5 and synth_tail.sum() > 5:
            tail_corrs.append(np.abs(real_tail.mean() - synth_tail.mean()))
    metrics['tail_preservation'] = 1 - np.mean(tail_corrs) if tail_corrs else 1.0
    
    return metrics


# Test on Adult Census
print("Loading Adult Census for Attack 1 fix...")
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
columns = ['age', 'workclass', 'fnlwgt', 'education', 'education_num', 'marital_status',
           'occupation', 'relationship', 'race', 'sex', 'capital_gain', 'capital_loss',
           'hours_per_week', 'native_country', 'income']
df_raw = pd.read_csv(url, names=columns, na_values=' ?', skipinitialspace=True)
df_raw = df_raw.dropna().reset_index(drop=True).sample(5000, random_state=SEED)
df_raw['income'] = (df_raw['income'] == '>50K').astype(int)
for col in ['workclass', 'education', 'marital_status', 'occupation', 'relationship', 'race', 'sex', 'native_country']:
    df_raw[col] = LabelEncoder().fit_transform(df_raw[col].astype(str))

synth = MISATASynthesizer(target_col='income', random_state=SEED)
synth.fit(df_raw)
df_synth = synth.sample(len(df_raw))

corr_metrics = comprehensive_correlation_metrics(df_raw, df_synth)

print("\n" + "="*60)
print("ATTACK 1 FIX: Comprehensive Correlation Metrics")
print("="*60)
for metric, value in corr_metrics.items():
    print(f"  {metric}: {value:.4f}")

---
## Attack 3 Fix: 20-Seed Validation

In [None]:
# 20-seed validation
n_seeds = 20
seeds = list(range(42, 42 + n_seeds))

fidelity_scores = []
tstr_scores = []

train_df, test_df = train_test_split(df_raw, test_size=0.2, random_state=42)

print(f"\nRunning {n_seeds}-seed validation...")
for i, seed in enumerate(seeds):
    synth = MISATASynthesizer(target_col='income', random_state=seed)
    synth.fit(train_df)
    df_synth = synth.sample(len(train_df), seed=seed)
    
    # Fidelity
    ks_scores = [1 - stats.ks_2samp(train_df[col], df_synth[col])[0] for col in train_df.columns]
    fidelity_scores.append(np.mean(ks_scores))
    
    # TSTR
    X_synth = df_synth.drop('income', axis=1)
    y_synth = df_synth['income']
    X_test = test_df.drop('income', axis=1)
    y_test = test_df['income']
    
    model = RandomForestClassifier(n_estimators=50, random_state=seed, n_jobs=-1)
    model.fit(X_synth, y_synth)
    tstr_scores.append(roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))
    
    if (i+1) % 5 == 0:
        print(f"  Completed {i+1}/{n_seeds} seeds")

print("\n" + "="*60)
print("ATTACK 3 FIX: 20-Seed Validation")
print("="*60)
print(f"  Fidelity: {np.mean(fidelity_scores):.4f} ± {np.std(fidelity_scores):.4f}")
print(f"  TSTR AUC: {np.mean(tstr_scores):.4f} ± {np.std(tstr_scores):.4f}")
print(f"  95% CI Fidelity: [{np.percentile(fidelity_scores, 2.5):.4f}, {np.percentile(fidelity_scores, 97.5):.4f}]")
print(f"  95% CI TSTR: [{np.percentile(tstr_scores, 2.5):.4f}, {np.percentile(tstr_scores, 97.5):.4f}]")

---
## Attack 4 Fix: Nonlinear SCM Validation

In [None]:
class NonlinearSCM:
    """
    Nonlinear Structural Causal Model.
    
    DAG with NONLINEAR relationships:
        X1 → X2 (exponential)
        X1 → X3 (quadratic)
        X2, X3 → Y (interaction)
    """
    
    @classmethod
    def generate(cls, n_samples, seed=42):
        rng = np.random.default_rng(seed)
        
        # Exogenous
        X1 = rng.normal(0, 1, n_samples)
        
        # Nonlinear relationships
        X2 = np.exp(0.3 * X1) + rng.normal(0, 0.5, n_samples)
        X3 = 0.5 * X1**2 + rng.normal(0, 0.5, n_samples)
        
        # Interaction term
        Y = np.tanh(X2 * X3) + 0.3 * X1 + rng.normal(0, 0.3, n_samples)
        
        return pd.DataFrame({'X1': X1, 'X2': X2, 'X3': X3, 'Y': Y})
    
    @classmethod
    def intervene(cls, n_samples, intervention_var, value, seed=42):
        rng = np.random.default_rng(seed)
        
        if intervention_var == 'X1':
            X1 = np.full(n_samples, value)
        else:
            X1 = rng.normal(0, 1, n_samples)
            
        if intervention_var == 'X2':
            X2 = np.full(n_samples, value)
        else:
            X2 = np.exp(0.3 * X1) + rng.normal(0, 0.5, n_samples)
            
        if intervention_var == 'X3':
            X3 = np.full(n_samples, value)
        else:
            X3 = 0.5 * X1**2 + rng.normal(0, 0.5, n_samples)
        
        Y = np.tanh(X2 * X3) + 0.3 * X1 + rng.normal(0, 0.3, n_samples)
        
        return pd.DataFrame({'X1': X1, 'X2': X2, 'X3': X3, 'Y': Y})


# Generate data from nonlinear SCM
print("\nGenerating nonlinear SCM data...")
df_nonlinear = NonlinearSCM.generate(5000, seed=SEED)

# Fit MISATA
synth_nl = MISATASynthesizer(target_col='Y', task='regression', random_state=SEED)
synth_nl.fit(df_nonlinear)

# Test interventions
intervention_values = [-1, 0, 1, 2]
nl_results = []

df_baseline_true = NonlinearSCM.generate(3000, seed=100)
df_baseline_misata = synth_nl.sample(3000, seed=100)

for val in intervention_values:
    # True effect
    df_true = NonlinearSCM.intervene(3000, 'X1', val, seed=100)
    true_effect = df_true['Y'].mean() - df_baseline_true['Y'].mean()
    
    # MISATA effect (approximate via conditioning)
    # For intervention, regenerate with X1 fixed
    df_misata_int = synth_nl.sample(3000, seed=100)
    # Approximate do(X1=val) by filtering samples near val
    mask = np.abs(df_misata_int['X1'] - val) < 0.5
    if mask.sum() > 100:
        misata_effect = df_misata_int.loc[mask, 'Y'].mean() - df_baseline_misata['Y'].mean()
    else:
        misata_effect = np.nan
    
    nl_results.append({
        'intervention': f'do(X1={val})',
        'true_effect': true_effect,
        'misata_effect': misata_effect
    })

nl_df = pd.DataFrame(nl_results)
nl_df = nl_df.dropna()

if len(nl_df) > 1:
    nl_correlation = np.corrcoef(nl_df['true_effect'], nl_df['misata_effect'])[0, 1]
else:
    nl_correlation = np.nan

print("\n" + "="*60)
print("ATTACK 4 FIX: Nonlinear SCM Validation")
print("="*60)
print(nl_df.to_string(index=False))
print(f"\n  Nonlinear Effect Recovery: r = {nl_correlation:.4f}")

---
## Attack 7 Fix: Cover Type Dataset

In [None]:
# Load Cover Type (54 features)
print("\nLoading Cover Type dataset (54 features)...")
covtype = fetch_covtype()
df_cover = pd.DataFrame(covtype.data, columns=[f'f{i}' for i in range(54)])
df_cover['target'] = covtype.target

# Use subset
df_cover = df_cover.sample(10000, random_state=SEED).reset_index(drop=True)

# Binary classification: forest type 1 vs rest
df_cover['target'] = (df_cover['target'] == 1).astype(int)

print(f"  Shape: {df_cover.shape}")
print(f"  Target distribution: {df_cover['target'].mean():.2%} positive")

# Split
train_cover, test_cover = train_test_split(df_cover, test_size=0.2, random_state=SEED)

# Fit MISATA
synth_cover = MISATASynthesizer(target_col='target', task='classification', random_state=SEED)
synth_cover.fit(train_cover)
df_synth_cover = synth_cover.sample(len(train_cover))

# Evaluate
# Fidelity
ks_scores = [1 - stats.ks_2samp(train_cover[col], df_synth_cover[col])[0] for col in train_cover.columns]
cover_fidelity = np.mean(ks_scores)

# TSTR
X_synth = df_synth_cover.drop('target', axis=1)
y_synth = df_synth_cover['target']
X_test = test_cover.drop('target', axis=1)
y_test = test_cover['target']

model = RandomForestClassifier(n_estimators=100, random_state=SEED, n_jobs=-1)
model.fit(X_synth, y_synth)
cover_tstr = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])

# TRTR baseline
X_train = train_cover.drop('target', axis=1)
y_train = train_cover['target']
model_real = RandomForestClassifier(n_estimators=100, random_state=SEED, n_jobs=-1)
model_real.fit(X_train, y_train)
cover_trtr = roc_auc_score(y_test, model_real.predict_proba(X_test)[:, 1])

print("\n" + "="*60)
print("ATTACK 7 FIX: Cover Type Dataset (54 features)")
print("="*60)
print(f"  Marginal Fidelity: {cover_fidelity:.4f}")
print(f"  TRTR AUC: {cover_trtr:.4f}")
print(f"  TSTR AUC: {cover_tstr:.4f}")
print(f"  TSTR Ratio: {cover_tstr/cover_trtr:.2%}")

---
## Attack 8 Fix: Privacy Metrics

In [None]:
def compute_privacy_metrics(real_df, synth_df, n_neighbors=5):
    """
    Compute privacy metrics:
    1. DCR (Distance to Closest Record)
    2. NNDR (Nearest Neighbor Distance Ratio)
    3. Simple Membership Inference Attack
    """
    metrics = {}
    
    # Normalize for distance computation
    scaler = StandardScaler()
    real_scaled = scaler.fit_transform(real_df)
    synth_scaled = scaler.transform(synth_df)
    
    # 1. Distance to Closest Record (DCR)
    # For each synthetic record, find distance to nearest real record
    nn = NearestNeighbors(n_neighbors=1, algorithm='ball_tree')
    nn.fit(real_scaled)
    distances, _ = nn.kneighbors(synth_scaled)
    
    metrics['dcr_mean'] = np.mean(distances)
    metrics['dcr_std'] = np.std(distances)
    metrics['dcr_min'] = np.min(distances)  # Risk indicator
    metrics['dcr_5th_percentile'] = np.percentile(distances, 5)
    
    # 2. NNDR (ratio of closest to second-closest)
    # Higher = more diverse synthetic data
    nn2 = NearestNeighbors(n_neighbors=2, algorithm='ball_tree')
    nn2.fit(real_scaled)
    distances2, _ = nn2.kneighbors(synth_scaled)
    nndr = distances2[:, 0] / (distances2[:, 1] + 1e-10)
    metrics['nndr_mean'] = np.mean(nndr)
    
    # 3. Membership Inference Attack (MIA)
    # Train classifier to distinguish real (in training) vs synthetic
    n_test = min(1000, len(real_df), len(synth_df))
    
    # Create holdout set from real data
    real_sample = real_df.sample(n_test, random_state=42)
    synth_sample = synth_df.sample(n_test, random_state=42)
    
    # Label: 1 = real, 0 = synthetic
    X_mia = pd.concat([real_sample, synth_sample], ignore_index=True)
    y_mia = np.array([1] * n_test + [0] * n_test)
    
    X_train, X_test_mia, y_train, y_test_mia = train_test_split(
        X_mia, y_mia, test_size=0.3, random_state=42
    )
    
    mia_model = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1)
    mia_model.fit(X_train, y_train)
    
    mia_auc = roc_auc_score(y_test_mia, mia_model.predict_proba(X_test_mia)[:, 1])
    metrics['mia_auc'] = mia_auc
    metrics['mia_advantage'] = 2 * (mia_auc - 0.5)  # 0 = perfect privacy, 1 = no privacy
    
    return metrics


# Compute privacy metrics on Adult dataset
print("\nComputing privacy metrics...")
privacy_metrics = compute_privacy_metrics(train_df, df_synth)

print("\n" + "="*60)
print("ATTACK 8 FIX: Privacy Metrics")
print("="*60)
print(f"  DCR Mean: {privacy_metrics['dcr_mean']:.4f}")
print(f"  DCR 5th Percentile: {privacy_metrics['dcr_5th_percentile']:.4f}")
print(f"  NNDR Mean: {privacy_metrics['nndr_mean']:.4f}")
print(f"  MIA AUC: {privacy_metrics['mia_auc']:.4f}")
print(f"  MIA Advantage: {privacy_metrics['mia_advantage']:.4f}")
print(f"\n  Interpretation:")
if privacy_metrics['dcr_5th_percentile'] < 0.5:
    print(f"    ⚠ LOW DCR: Some synthetic records are close to real records")
else:
    print(f"    ✓ Good DCR: Synthetic records are sufficiently distant")
if privacy_metrics['mia_auc'] > 0.7:
    print(f"    ⚠ HIGH MIA: Synthetic data is distinguishable from real")
else:
    print(f"    ✓ Good MIA: Synthetic data is hard to distinguish")

---
## Attack 9 Fix: Multi-Model TSTR

In [None]:
# Multi-model TSTR evaluation
print("\nRunning multi-model TSTR evaluation...")

models = {
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=SEED, n_jobs=-1),
    'XGBoost': xgb.XGBClassifier(n_estimators=100, random_state=SEED, use_label_encoder=False, eval_metric='logloss'),
    'LogisticRegression': LogisticRegression(max_iter=500, random_state=SEED),
    'GradientBoosting': GradientBoostingClassifier(n_estimators=100, random_state=SEED)
}

multi_model_results = []

X_train_real = train_df.drop('income', axis=1)
y_train_real = train_df['income']
X_synth_mm = df_synth.drop('income', axis=1)
y_synth_mm = df_synth['income']
X_test_mm = test_df.drop('income', axis=1)
y_test_mm = test_df['income']

for name, model in models.items():
    # TRTR
    model_real = model.__class__(**model.get_params())
    model_real.fit(X_train_real, y_train_real)
    trtr = roc_auc_score(y_test_mm, model_real.predict_proba(X_test_mm)[:, 1])
    
    # TSTR
    model_synth = model.__class__(**model.get_params())
    model_synth.fit(X_synth_mm, y_synth_mm)
    tstr = roc_auc_score(y_test_mm, model_synth.predict_proba(X_test_mm)[:, 1])
    
    multi_model_results.append({
        'model': name,
        'trtr': trtr,
        'tstr': tstr,
        'ratio': tstr / trtr
    })
    print(f"  {name}: TRTR={trtr:.4f}, TSTR={tstr:.4f}, Ratio={tstr/trtr:.2%}")

mm_df = pd.DataFrame(multi_model_results)

print("\n" + "="*60)
print("ATTACK 9 FIX: Multi-Model TSTR")
print("="*60)
print(mm_df.to_string(index=False))
print(f"\n  Overall Mean TSTR Ratio: {mm_df['ratio'].mean():.2%} ± {mm_df['ratio'].std():.2%}")

---
## Save All Results

In [None]:
# Compile all results
all_results = {
    # Attack 1
    'pearson_similarity': corr_metrics['pearson_similarity'],
    'kendall_tau': corr_metrics['kendall_tau_mean'],
    'spearman_similarity': corr_metrics['spearman_similarity'],
    'tail_preservation': corr_metrics['tail_preservation'],
    
    # Attack 3
    'fidelity_20seed_mean': np.mean(fidelity_scores),
    'fidelity_20seed_std': np.std(fidelity_scores),
    'tstr_20seed_mean': np.mean(tstr_scores),
    'tstr_20seed_std': np.std(tstr_scores),
    
    # Attack 4
    'nonlinear_scm_correlation': nl_correlation,
    
    # Attack 7
    'covtype_fidelity': cover_fidelity,
    'covtype_tstr_ratio': cover_tstr / cover_trtr,
    
    # Attack 8
    'dcr_mean': privacy_metrics['dcr_mean'],
    'dcr_5th': privacy_metrics['dcr_5th_percentile'],
    'mia_auc': privacy_metrics['mia_auc'],
    'mia_advantage': privacy_metrics['mia_advantage'],
    
    # Attack 9
    'multi_model_tstr_mean': mm_df['ratio'].mean(),
    'multi_model_tstr_std': mm_df['ratio'].std()
}

pd.DataFrame([all_results]).to_csv('adversarial_fixes_results.csv', index=False)

print("\n" + "="*70)
print("ADVERSARIAL ATTACK FIXES COMPLETE")
print("="*70)
print("\nSummary:")
print(f"  Attack 1 (Correlation): Kendall τ = {corr_metrics['kendall_tau_mean']:.4f}")
print(f"  Attack 3 (Seeds): 20-seed TSTR = {np.mean(tstr_scores):.4f} ± {np.std(tstr_scores):.4f}")
print(f"  Attack 4 (Nonlinear): Effect recovery r = {nl_correlation:.4f}")
print(f"  Attack 7 (Datasets): Cover Type TSTR = {cover_tstr/cover_trtr:.2%}")
print(f"  Attack 8 (Privacy): MIA advantage = {privacy_metrics['mia_advantage']:.4f}")
print(f"  Attack 9 (Models): Multi-model TSTR = {mm_df['ratio'].mean():.2%}")
print("\nFile saved: adversarial_fixes_results.csv")