In [424]:
import numpy as np
import pandas as pd
import pickle
import yaml
import sys
from audit import exp_one_acc, exp_all_acc, exp_worst_eps, exp_estimated_epsilon, exp_all_avg_acc, exp_all_group_avg_acc
from visual import *
from util import *
import json

In [425]:
def get_config(dataset_name, model_name):
    configs = f"configs/{dataset_name}/{dataset_name}_regular.yaml"
    if model_name == "LR" and dataset_name != "mnist":
        configs = f"configs/tabular/{dataset_name}/{dataset_name}_regular.yaml"

    with open(configs, "rb") as f:
        configs = yaml.load(f, Loader=yaml.Loader)
    return configs


## get data
def get_one_seed(dataset_name, model_name, method, epsilon, other, seed, configs):
    subdata_dir = f'{configs["run"]["log_dir"]}{dataset_name}.pkl'
    log_dir = f'{configs["run"]["log_dir"]}{model_name}/{method}/eps{epsilon}{seed}'
    directories = {
        "log_dir": log_dir,
        "report_dir": f"{log_dir}/report",
        "signal_dir": f"{log_dir}/signals",
    }
    num_group = configs["train"]["num_groups"]

    path = subdata_dir
    if num_group == 2:
        path = f"data/tabular/{dataset_name}.pkl"
    elif num_group == 5:
        path = f"data/{dataset_name}.pkl"
    with open(path, "rb") as file:
        dataset = pickle.load(file)

    path = directories["report_dir"]
    memberships = np.load(f"{path}/memberships.npy")
    loss_scores = np.load(f"{path}/loss_scores.npy")


    alooa_adv_group, ga_adv_group, gba_adv_group = [], [], []
    alooa_acc = exp_all_acc(f"{directories['report_dir']}/loss", loss_scores, memberships, dataset, num_group)
    alooa_adv = [[2*value-1 for value in group] for group in alooa_acc]
    alooa_adv_group = [np.mean([value for value in group]) for group in alooa_adv]

    if other:
        ga_acc = exp_all_avg_acc(f"{directories['report_dir']}/loss", loss_scores, memberships, dataset, num_group)
        ga_adv = [[2*value-1 for value in group] for group in ga_acc]
        ga_adv_group = [np.mean([value for value in group]) for group in ga_adv]

        gba_acc = exp_all_group_avg_acc(f"{directories['report_dir']}/loss", loss_scores, memberships, dataset, num_group)
        gba_adv = [[2*value-1 for value in group] for group in gba_acc]
        gba_adv_group = [np.mean([value for value in group]) for group in gba_adv]

    return alooa_adv_group, ga_adv_group, gba_adv_group


def get_all_seeds(dataset_name, model_name, method, epsilon, other=True):
    configs = get_config(dataset_name, model_name)

    random_seeds = ["_1", "_12", "_123", "_1234", ""]

    alooa_adv, ga_adv, gba_adv = [], [], []

    for seed in random_seeds:
        alooa_adv_group, ga_adv_group, gba_adv_group = get_one_seed(dataset_name, model_name, method, epsilon, other, seed, configs)
        alooa_adv.append(alooa_adv_group)
        ga_adv.append(ga_adv_group)
        gba_adv.append(gba_adv_group)
    
    return np.array(alooa_adv), np.array(ga_adv), np.array(gba_adv)

In [502]:
dataset_name = "raceface"
model_name = "CNN"
method = "regular"
epsilon = 0

In [503]:
def get_delta_mean_std(adv):
    delta = (np.max(adv, axis=1) - np.min(adv, axis=1)) * 100
    mean_delta = np.mean(delta, axis=0)
    std_delta = np.std(delta, axis=0)
    return mean_delta, std_delta

alooa_adv, ga_adv, gba_adv = get_all_seeds(dataset_name, model_name, method, epsilon, other=False)

# ga_delta_mean, ga_delta_std = get_delta_mean_std(ga_adv)
# print(rf"ga: {ga_delta_mean:.4f} $\pm$ {ga_delta_std:.4f}")
# gba_delta_mean, gba_delta_std = get_delta_mean_std(gba_adv)
# print(rf"gba: {gba_delta_mean:.4f} $\pm$ {gba_delta_std:.4f}")
alooa_delta_mean, alooa_delta_std = get_delta_mean_std(alooa_adv)
print(rf"alooa: {alooa_delta_mean:.4f} $\pm$ {alooa_delta_std:.4f}")


alooa: 1.7464 $\pm$ 0.1085


In [428]:
# alooa_delta_mean, alooa_delta_std = get_delta_mean_std(alooa_adv)
# print(rf"alooa: {alooa_delta_mean:.4f} $\pm$ {alooa_delta_std:.4f}")
# # 测试精度
# configs = get_config(dataset_name, model_name)
# path = f"{configs["train"]["log_dir"]}{model_name}/{method}/eps{epsilon}"
# with open(f"{path}/models/models_metadata.json", "r") as file:
#     data = json.load(file)

# def get_mean_std(name):
#     values = [entry[name] for entry in data.values()]
#     mean_value = np.mean(values) 
#     std_value = np.std(values)
#     return mean_value, std_value

# # test_accs = [entry["test_acc"] for entry in data.values()]
# # mean_test_acc = np.mean(test_accs) * 100
# # std_test_acc = np.std(test_accs)
# # print(f"Mean test accuracy: {mean_test_acc:.2f}")
# # print(f"Standard deviation: {std_test_acc:.4f}")

# acc_mean, acc_std = get_mean_std("test_acc")
# print(rf"test accuracy: {acc_mean:.4f} $\pm$ {acc_std:.4f}")
# ap_mean, ap_std = get_mean_std("accuracy_parity")
# print(rf"accuracy_parity: {ap_mean:.4f} $\pm$ {ap_std:.4f}")
# dp_mean, dp_std = get_mean_std("demographic_parity")
# print(rf"demographic_parity: {dp_mean:.4f} $\pm$ {dp_std:.4f}")
# eop_mean, eop_std = get_mean_std("equal_opportunity")
# print(rf"equal_opportunity: {eop_mean:.4f} $\pm$ {eop_std:.4f}")
# eod_mean, eod_std = get_mean_std("equalized_odds")
# print(rf"equalized_odds: {eod_mean:.4f} $\pm$ {eod_std:.4f}")


In [429]:
# """
# can be used for running multiple times pa-alooa, which have many group_adv_mean
# """
# # Kruskal-Wallis 
# from scipy.stats import kruskal

# stat, p = kruskal(*all_adv_acc)
# print(f"statistic: {stat}, p-value: {p}")

# # Mann-Whitney U 
# from scipy.stats import mannwhitneyu

# stat, p = mannwhitneyu(all_adv_acc[1], all_adv_acc[5], alternative='two-sided')
# print(f"statistic: {stat}, p-value: {p}")

### one seed

In [430]:
# # configs = "configs/utkface/utkface_regular.yaml"
# # configs = "configs/raceface/raceface_regular.yaml"
# configs = "configs/mnist/mnist_regular.yaml"
# # configs = "configs/tabular/bank/bank_regular.yaml"
# with open(configs, "rb") as f:
#     configs = yaml.load(f, Loader=yaml.Loader)

# dataset_name = configs["data"]["dataset"]
# num_group = configs["train"]["num_groups"]
# model_name = "CNN"
# epsilon = 10
# method = "regular"
# seed = ""


# subdata_dir = f'{configs["run"]["log_dir"]}{dataset_name}.pkl'
# # exp_dir = f'{configs['run']['log_dir']}{model_name}/{method}/data/data_{data_idx}/eps{epsilon}'
# exp_dir = f'{configs["run"]["log_dir"]}{model_name}/{method}/eps{epsilon}{seed}'
# log_dir = exp_dir
# directories = {
#     "log_dir": log_dir,
#     "report_dir": f"{log_dir}/report",
#     "signal_dir": f"{log_dir}/signals",
# }

# path = subdata_dir
# if num_group == 2:
#     path = f"data/tabular/{dataset_name}.pkl"
# elif num_group == 5:
#     path = f"data/{dataset_name}.pkl"
# with open(path, "rb") as file:
#     dataset = pickle.load(file)

# path = directories["report_dir"]
# memberships = np.load(f"{path}/memberships.npy")
# loss_scores = np.load(f"{path}/loss_scores.npy")


# alooa_acc = exp_all_acc(f"{directories['report_dir']}/loss", loss_scores, memberships, dataset, num_group)
# alooa_acc_group = [np.mean([value for value in group]) for group in alooa_acc]
# alooa_adv = [[2*value-1 for value in group] for group in alooa_acc]
# alooa_adv_group = [np.mean([value for value in group]) for group in alooa_adv]

# # ga_acc = exp_all_avg_acc(f"{directories['report_dir']}/loss", loss_scores, memberships, dataset, num_group)
# # ga_acc_group = [np.mean([value for value in group]) for group in ga_acc]
# # ga_adv = [[2*value-1 for value in group] for group in ga_acc]
# # ga_adv_group = [np.mean([value for value in group]) for group in ga_adv]

# # gba_acc = exp_all_group_avg_acc(f"{directories['report_dir']}/loss", loss_scores, memberships, dataset, num_group)
# # gba_acc_group = [np.mean([value for value in group]) for group in gba_acc]
# # gba_adv = [[2*value-1 for value in group] for group in gba_acc]
# # gba_adv_group = [np.mean([value for value in group]) for group in gba_adv]

In [431]:
# print((max(alooa_adv_group) - min(alooa_adv_group)) *100)
# print((max(ga_adv_group) - min(ga_adv_group)) *100)
# print((max(gba_adv_group) - min(gba_adv_group)) *100)

In [432]:
# print(alooa_acc_group)
# print(ga_acc_group)
# print(gba_acc_group)

In [433]:
# group_mean = [np.mean([value for value in group]) for group in all_acc]
# print(group_mean)
# v = (max(group_mean) - min(group_mean)) *100
# print(f"{v:.3f}")

In [434]:
# group_adv_mean = [np.mean([value for value in group]) for group in all_adv_acc]
# print(group_adv_mean)
# v = (max(group_adv_mean) - min(group_adv_mean)) *100
# print(f"{v:.3f}")