In [1]:
import warnings
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

# modelling
from sklearn.metrics import roc_auc_score
from sklearn.exceptions import UndefinedMetricWarning

# custom
from datasets import IMB_DATASETS, load_data, prepare_data
from attacks import membership_attack

In [2]:
warnings.simplefilter("ignore", category=FutureWarning)
warnings.simplefilter("ignore", category=UndefinedMetricWarning)

In [3]:
np.random.seed(42)

In [4]:
N_TRAIN = 200
N_TEST = 100
N_ALL = N_TRAIN + N_TEST

N_TARGETS = 100

K = 5
SAMPLING_STRATEGY = 1

In [5]:
all_aucs = {}


for data_name in tqdm(IMB_DATASETS, leave=False):
    X, y = load_data(data_name)
    X, y = prepare_data(X, y)
    
    n_minority = (y == 1).sum()
    minority_targets_idx = range(n_minority)
    if len(minority_targets_idx) > N_TARGETS:
        rng = np.random.default_rng(seed=42)
        minority_targets_idx = rng.choice(minority_targets_idx, size=N_TARGETS, replace=False)

    aucs = []
    for minority_target_idx in tqdm(minority_targets_idx, leave=False):
        gen_kwargs = {"k_neighbors": K, "sampling_strategy": SAMPLING_STRATEGY}
        target_labels, target_preds = membership_attack(X, y, minority_target_idx, N_TRAIN, N_TEST, attack_mode="gen", gen_kwargs=gen_kwargs)
        auc = roc_auc_score(target_labels, target_preds)
        aucs.append(auc)
    
    all_aucs[data_name] = aucs

    aucs_10 = np.sort(aucs)[-10:]
    print(f"{data_name}: top 10 aucs -- {aucs_10.mean():.4f} +- {aucs_10.std():.4f}")


  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

ecoli: top 10 aucs -- 0.9941 +- 0.0082


  0%|          | 0/51 [00:00<?, ?it/s]

yeast_me2: top 10 aucs -- 1.0000 +- 0.0000


  0%|          | 0/68 [00:00<?, ?it/s]

solar_flare_m0: top 10 aucs -- 1.0000 +- 0.0000


  0%|          | 0/100 [00:00<?, ?it/s]

abalone: top 10 aucs -- 0.7542 +- 0.0702


  0%|          | 0/100 [00:00<?, ?it/s]

car_eval_34: top 10 aucs -- 0.9887 +- 0.0036


  0%|          | 0/65 [00:00<?, ?it/s]

car_eval_4: top 10 aucs -- 1.0000 +- 0.0000


  0%|          | 0/100 [00:00<?, ?it/s]

mammography: top 10 aucs -- 0.9837 +- 0.0167


  0%|          | 0/32 [00:00<?, ?it/s]

abalone_19: top 10 aucs -- 1.0000 +- 0.0000


In [None]:
for top in [10, 50, 100]:
    columns=["dataset", "auc"]
    scores_df = pd.DataFrame(columns=columns, dtype=str)

    for data_name in IMB_DATASETS:
        aucs_top = np.sort(all_aucs[data_name])[-top:]

        # print(f"{data_name}: average AUC across all targets -- {aucs_top.mean():.4f} +- {aucs_top.std():.4f}")

        data_socres = [f"{data_name}",
                    f"{aucs_top.mean():.4f} +- {aucs_top.std():.4f}"
                    ]
        scores_df = pd.concat([scores_df, pd.DataFrame([data_socres], columns=columns)], ignore_index=True)

    print(scores_df)
    # scores_df.to_csv(f"results/synth/mia_{top}.csv", index=False)

          dataset               auc
0           ecoli  0.9941 +- 0.0082
1       yeast_me2  1.0000 +- 0.0000
2  solar_flare_m0  1.0000 +- 0.0000
3         abalone  0.7542 +- 0.0702
4     car_eval_34  0.9887 +- 0.0036
5      car_eval_4  1.0000 +- 0.0000
6     mammography  0.9837 +- 0.0167
7      abalone_19  1.0000 +- 0.0000
          dataset               auc
0           ecoli  0.9332 +- 0.0525
1       yeast_me2  0.9906 +- 0.0119
2  solar_flare_m0  0.9986 +- 0.0018
3         abalone  0.6462 +- 0.0673
4     car_eval_34  0.9694 +- 0.0133
5      car_eval_4  0.9993 +- 0.0006
6     mammography  0.9141 +- 0.0448
7      abalone_19  0.9999 +- 0.0003
          dataset               auc
0           ecoli  0.9332 +- 0.0525
1       yeast_me2  0.9890 +- 0.0165
2  solar_flare_m0  0.9938 +- 0.0122
3         abalone  0.5878 +- 0.0775
4     car_eval_34  0.9311 +- 0.0576
5      car_eval_4  0.9945 +- 0.0130
6     mammography  0.8512 +- 0.0750
7      abalone_19  0.9999 +- 0.0003
