In [10]:
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load dataset
with open("dataset_description.pkl", "rb") as f:
    dc = pickle.load(f)

X = dc["latent_vars"]      # shape (n, 100)
attrs = dc["attributes"]   # shape (n, 73)
fields = dc["fields"]      # list of 73 attribute names

print("X shape:", X.shape)
print("attrs shape:", attrs.shape)
print("Number of attributes:", len(fields))

def bin_attr(attrs, idx):
    """
    Convert attribute column to binary {0,1} assuming signed attributes.
    Positive -> 1, non-positive -> 0.
    """
    return (attrs[:, idx] > 0).astype(int)

def eval_pair(target_name, protected_name, test_size=0.4, random_state=0):
    """
    Train a logistic regression classifier for target_name and
    evaluate accuracy and group-wise accuracy by protected_name.
    Returns a dict with metrics.
    """
    if target_name not in fields or protected_name not in fields:
        raise ValueError("Attribute name not found in fields list.")
    
    j_y = fields.index(target_name)
    j_g = fields.index(protected_name)
    
    y = bin_attr(attrs, j_y)
    g = bin_attr(attrs, j_g)
    
    # Split into train / (audit + test)
    X_train, X_tmp, y_train, y_tmp, g_train, g_tmp = train_test_split(
        X, y, g, test_size=test_size, random_state=random_state, stratify=y
    )
    
    # Split (audit + test) into audit and test (50/50)
    X_audit, X_test, y_audit, y_test, g_audit, g_test = train_test_split(
        X_tmp, y_tmp, g_tmp, test_size=0.5, random_state=random_state + 1, stratify=y_tmp
    )
    
    # Base logistic regression
    base = LogisticRegression(max_iter=1000, solver="lbfgs")
    base.fit(X_train, y_train)
    p_test = base.predict_proba(X_test)[:, 1]
    y_pred = (p_test >= 0.5).astype(int)
    
    acc = accuracy_score(y_test, y_pred)
    
    # Group-wise accuracy
    acc_groups = {}
    sizes = {}
    for val in [0, 1]:
        mask = (g_test == val)
        sizes[val] = int(mask.sum())
        if mask.sum() == 0:
            acc_groups[val] = np.nan
        else:
            acc_groups[val] = accuracy_score(y_test[mask], y_pred[mask])
    
    if np.all(np.isfinite(list(acc_groups.values()))):
        gap = abs(acc_groups[0] - acc_groups[1])
    else:
        gap = np.nan
    
    return {
        "acc": acc,
        "acc_g0": acc_groups[0],
        "acc_g1": acc_groups[1],
        "gap": gap,
        "n_test": len(y_test),
        "n_g0": sizes[0],
        "n_g1": sizes[1],
    }

# Candidate targets and protected attributes we want to scan
candidate_targets = [
    "Smiling",
    "Frowning",
    "Attractive Man",
    "Attractive Woman",
    "Heavy Makeup",
    "Wearing Lipstick",
]

candidate_protected = [
    "Male",
    "White",
    "Black",
    "Asian",
    "Indian",
    "Youth",
    "Senior",
]

results = []

for t_name in candidate_targets:
    if t_name not in fields:
        continue
    for g_name in candidate_protected:
        if g_name not in fields:
            continue
        if t_name == g_name:
            continue
        
        try:
            stats = eval_pair(t_name, g_name)
            results.append((t_name, g_name, stats))
        except Exception as e:
            print(f"Skipping pair ({t_name}, {g_name}) due to error: {e}")

# Sort by absolute group accuracy gap (largest first)
results_sorted = sorted(results, key=lambda x: -x[2]["gap"])

print("\nTop pairs by abs(group accuracy gap):")
for (t_name, g_name, s) in results_sorted:
    print(
        f"Target={t_name:18s} | Protected={g_name:12s} | "
        f"acc={s['acc']:.3f} | acc_g0={s['acc_g0']:.3f} | "
        f"acc_g1={s['acc_g1']:.3f} | gap={s['gap']:.3f} | "
        f"n_g0={s['n_g0']}, n_g1={s['n_g1']}"
    )


X shape: (13143, 100)
attrs shape: (13143, 73)
Number of attributes: 73

Top pairs by abs(group accuracy gap):
Target=Wearing Lipstick   | Protected=Male         | acc=0.874 | acc_g0=0.598 | acc_g1=0.957 | gap=0.359 | n_g0=610, n_g1=2019
Target=Heavy Makeup       | Protected=Male         | acc=0.890 | acc_g0=0.621 | acc_g1=0.971 | gap=0.350 | n_g0=607, n_g1=2022
Target=Attractive Woman   | Protected=Male         | acc=0.867 | acc_g0=0.599 | acc_g1=0.938 | gap=0.339 | n_g0=548, n_g1=2081
Target=Attractive Woman   | Protected=Youth        | acc=0.867 | acc_g0=0.908 | acc_g1=0.647 | gap=0.261 | n_g0=2215, n_g1=414
Target=Attractive Man     | Protected=Senior       | acc=0.711 | acc_g0=0.662 | acc_g1=0.890 | gap=0.228 | n_g0=2073, n_g1=556
Target=Heavy Makeup       | Protected=Youth        | acc=0.890 | acc_g0=0.929 | acc_g1=0.702 | gap=0.227 | n_g0=2182, n_g1=447
Target=Wearing Lipstick   | Protected=Youth        | acc=0.874 | acc_g0=0.904 | acc_g1=0.718 | gap=0.186 | n_g0=2203, n_g1=426


In [8]:
with open("dataset_description.pkl", "rb") as f:
    dc = pickle.load(f)

fields = dc["fields"]
print(len(fields))
for i, name in enumerate(fields):
    print(i, name)



73
0 Male
1 Asian
2 White
3 Black
4 Baby
5 Child
6 Youth
7 Middle Aged
8 Senior
9 Black Hair
10 Blond Hair
11 Brown Hair
12 Bald
13 No Eyewear
14 Eyeglasses
15 Sunglasses
16 Mustache
17 Smiling
18 Frowning
19 Chubby
20 Blurry
21 Harsh Lighting
22 Flash
23 Soft Lighting
24 Outdoor
25 Curly Hair
26 Wavy Hair
27 Straight Hair
28 Receding Hairline
29 Bangs
30 Sideburns
31 Fully Visible Forehead
32 Partially Visible Forehead
33 Obstructed Forehead
34 Bushy Eyebrows
35 Arched Eyebrows
36 Narrow Eyes
37 Eyes Open
38 Big Nose
39 Pointy Nose
40 Big Lips
41 Mouth Closed
42 Mouth Slightly Open
43 Mouth Wide Open
44 Teeth Not Visible
45 No Beard
46 Goatee
47 Round Jaw
48 Double Chin
49 Wearing Hat
50 Oval Face
51 Square Face
52 Round Face
53 Color Photo
54 Posed Photo
55 Attractive Man
56 Attractive Woman
57 Indian
58 Gray Hair
59 Bags Under Eyes
60 Heavy Makeup
61 Rosy Cheeks
62 Shiny Skin
63 Pale Skin
64 5 o Clock Shadow
65 Strong Nose-Mouth Lines
66 Wearing Lipstick
67 Flushed Face
68 High Chee

In [1]:
import pickle
import numpy as np

from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# ---------------------------------------------------
# 1. Load dataset and choose target / protected pair
# ---------------------------------------------------

with open("dataset_description.pkl", "rb") as f:
    dc = pickle.load(f)

X = dc["latent_vars"]        # shape (N, d)
attrs = dc["attributes"]     # shape (N, 73)
fields = dc["fields"]        # list of 73 attribute names

print("Loaded dataset_description.pkl")
print("X shape:", X.shape)
print("attrs shape:", attrs.shape)
print("Number of attributes:", len(fields))

# Choose the target and protected attributes
target_name = "Frowning"   # label to predict
protected_name = "Indian"            # protected attribute

j_y = fields.index(target_name)
j_g = fields.index(protected_name)

# Binary label and protected group (attrs > 0 means attribute present)
y = (attrs[:, j_y] > 0).astype(int)
g = (attrs[:, j_g] > 0).astype(int)

print(f"\nTarget attribute = {target_name}")
print(f"Protected attribute = {protected_name}")
print("Positive rate (y=1): {:.3f}".format(y.mean()))
print("Group 1 fraction (g=1): {:.3f}".format(g.mean()))

# ---------------------------------------------------
# 2. Train / Audit / Test split
# ---------------------------------------------------

X_train, X_tmp, y_train, y_tmp, g_train, g_tmp = train_test_split(
    X, y, g, test_size=0.4, random_state=0, stratify=y
)

X_audit, X_test, y_audit, y_test, g_audit, g_test = train_test_split(
    X_tmp, y_tmp, g_tmp, test_size=0.5, random_state=1, stratify=y_tmp
)

print("\nDataset sizes:")
print("  Train: ", X_train.shape[0])
print("  Audit: ", X_audit.shape[0])
print("  Test  : ", X_test.shape[0])

# ---------------------------------------------------
# 3. Helper: metrics and multiaccuracy update
# ---------------------------------------------------

def report_metrics(name, y_true, p, g=None):
    """
    Print overall accuracy and (optionally) per-group accuracies.
    
    Args:
        name: String name for the model
        y_true: True labels (0/1), shape (n,)
        p: Predicted probabilities in [0,1], shape (n,)
        g: Protected group labels (0/1), shape (n,) or None
    """
    y_pred = (p >= 0.5).astype(int)
    acc = accuracy_score(y_true, y_pred)
    print(f"=== {name} ===")
    print(f"Accuracy: {acc:.3f}")
    
    if g is not None:
        accs = {}
        for val in [0, 1]:
            mask = (g == val)
            if mask.sum() == 0:
                continue
            acc_g = accuracy_score(y_true[mask], y_pred[mask])
            accs[val] = acc_g
            print(f"  Group {val} accuracy ({mask.sum()} samples): {acc_g:.3f}")
        if 0 in accs and 1 in accs:
            gap = abs(accs[0] - accs[1])
            print(f"  |accuracy gap| (group 0 vs 1): {gap:.3f}")
    print()


def multiaccuracy_update(probs, h_vals, eta):
    """
    Multiaccuracy multiplicative-weights-style update on probabilities.
    
    We interpret probs as sigmoid(logits). We move logits in direction -eta * h(x)
    and then map back through sigmoid.
    
    Args:
        probs: Current probabilities in [0,1], shape (n,)
        h_vals: Auditor outputs h_t(x), shape (n,)
        eta: Learning rate for the update
        
    Returns:
        Updated probabilities in [0,1], shape (n,)
    """
    # Avoid numerical issues at 0 and 1
    eps = 1e-6
    p = np.clip(probs, eps, 1 - eps)
    
    # logit(p) = log(p / (1-p))
    logits = np.log(p / (1.0 - p))
    
    # Update logits by moving in the opposite direction of auditor prediction
    new_logits = logits - eta * h_vals
    
    # Map back through sigmoid
    new_probs = 1.0 / (1.0 + np.exp(-new_logits))
    return new_probs


def run_non_dp_multiaccuracy_boost(
    X_train, y_train,
    X_audit, y_audit,
    X_test, y_test,
    base_model,
    T=5,
    eta=0.5,
    auditor_lr=0.05,
    auditor_steps=200,
    stop_threshold=0.002,
    seed=0,
):
    """
    Non-DP multiaccuracy boosting (no privacy, exact correlation).
    
    We:
      1) Start from base_model probabilities on audit and test.
      2) At each round t:
         - Fit a linear regression auditor to residuals on the audit set.
         - Compute exact correlation delta_t = E[h_t(x)*(f_t(x)-y)] on audit.
         - If |delta_t| < stop_threshold, stop.
         - Otherwise update f_t on both audit and test using multiaccuracy_update.
    
    Args:
        X_train, y_train: training data (not directly used here, only for base_model fitting outside)
        X_audit, y_audit: audit set
        X_test, y_test: test set (for evaluation of post-processed classifier)
        base_model: already-fitted probabilistic classifier with predict_proba
        T: maximum number of boosting rounds
        eta: multiaccuracy learning rate
        auditor_lr: (unused here; kept to mirror DP version signature)
        auditor_steps: (unused here)
        stop_threshold: stop if |delta_t| < this value
        seed: random seed for reproducibility
        
    Returns:
        dict with:
            "fT_audit": final probabilities on audit
            "fT_test" : final probabilities on test
            "history" : dict with "delta" (exact correlations per round)
    """
    rng = np.random.RandomState(seed)
    
    # Start from base-model probabilities
    f_audit = base_model.predict_proba(X_audit)[:, 1]
    f_test = base_model.predict_proba(X_test)[:, 1]
    
    deltas = []
    
    for t in range(T):
        # Compute residuals on audit
        residuals = f_audit - y_audit  # shape (n_audit,)
        
        # Fit a linear regression auditor h_t(x) â‰ˆ residual
        # We use a simple closed-form / sklearn LinearRegression (non-DP)
        auditor = LinearRegression()
        auditor.fit(X_audit, residuals)
        
        # Auditor predictions on audit and test
        h_audit = auditor.predict(X_audit)
        h_test = auditor.predict(X_test)
        
        # Exact correlation on the audit set
        delta_t = np.mean(h_audit * (f_audit - y_audit))
        deltas.append(delta_t)
        
        print(f"[Non-DP MA] Round {t}: correlation delta_t = {delta_t:.6f}")
        
        # Stopping condition
        if abs(delta_t) < stop_threshold:
            print(f"Stopping early at round {t} (|delta_t| < {stop_threshold}).")
            break
        
        # Multiaccuracy update on both audit and test probabilities
        f_audit = multiaccuracy_update(f_audit, h_audit, eta)
        f_test = multiaccuracy_update(f_test, h_test, eta)
    
    return {
        "fT_audit": f_audit,
        "fT_test": f_test,
        "history": {
            "delta": deltas,
        },
    }

# ---------------------------------------------------
# 4. Base model (no multiaccuracy, no DP)
# ---------------------------------------------------

base = LogisticRegression(max_iter=1000, solver="lbfgs")
base.fit(X_train, y_train)

p_test_base = base.predict_proba(X_test)[:, 1]
report_metrics("Base model", y_test, p_test_base, g_test)

# ---------------------------------------------------
# 5. Non-DP multiaccuracy boosting
# ---------------------------------------------------

T = 5               # max number of boosting rounds
eta = 0.2           # multiaccuracy learning rate
stop_threshold = 0.002

results_ma = run_non_dp_multiaccuracy_boost(
    X_train, y_train,
    X_audit, y_audit,
    X_test, y_test,
    base_model=base,
    T=T,
    eta=eta,
    auditor_lr=0.05,       # kept for symmetry with DP version
    auditor_steps=200,
    stop_threshold=stop_threshold,
    seed=0,
)

p_test_ma = results_ma["fT_test"]
report_metrics("Non-DP multiaccuracy (post-processed)", y_test, p_test_ma, g_test)

print("Exact correlations per round (non-DP):", results_ma["history"]["delta"])
print()

# ---------------------------------------------------
# 6. DP multiaccuracy boosting (using your DP code)
# ---------------------------------------------------
from dp_multiaccuracy_utils import run_dp_multiaccuracy_boost


epsilon_round = 1      # per-round epsilon
delta_round   = 1     # per-round delta

T = 5
eta = 0.2

results_dp = run_dp_multiaccuracy_boost(
    X_train, y_train,
    X_audit, y_audit,
    X_test, y_test,
    base_model=base,
    T=T,
    eta=eta,
    epsilon_round=epsilon_round,
    delta_round=delta_round,
    clipping_grad=1.0,
    clipping_corr=1.0,
    auditor_lr=0.05,
    auditor_steps=200,
    auditor_batch_size=256,
    stop_threshold=0.002,
    seed=0,
)

p_test_dp = results_dp["fT_test"]
report_metrics("DP multiaccuracy (post-processed)", y_test, p_test_dp, g_test)

print("Noisy correlation estimates per round (DP):", results_dp["history"]["delta_hat"])



Loaded dataset_description.pkl
X shape: (13143, 100)
attrs shape: (13143, 73)
Number of attributes: 73

Target attribute = Frowning
Protected attribute = Indian
Positive rate (y=1): 0.581
Group 1 fraction (g=1): 0.023

Dataset sizes:
  Train:  7885
  Audit:  2629
  Test  :  2629
=== Base model ===
Accuracy: 0.713
  Group 0 accuracy (2573 samples): 0.714
  Group 1 accuracy (56 samples): 0.643
  |accuracy gap| (group 0 vs 1): 0.071

[Non-DP MA] Round 0: correlation delta_t = 0.010194
[Non-DP MA] Round 1: correlation delta_t = 0.009435
[Non-DP MA] Round 2: correlation delta_t = 0.008732
[Non-DP MA] Round 3: correlation delta_t = 0.008081
[Non-DP MA] Round 4: correlation delta_t = 0.007479
=== Non-DP multiaccuracy (post-processed) ===
Accuracy: 0.711
  Group 0 accuracy (2573 samples): 0.713
  Group 1 accuracy (56 samples): 0.625
  |accuracy gap| (group 0 vs 1): 0.088

Exact correlations per round (non-DP): [0.010194279930917071, 0.009435071570672604, 0.008731967457888604, 0.008081043025248