In [None]:
import numpy as np
import os
from sklearn.metrics import roc_auc_score

# models, methods and races
root_path = "./saved_confidences"
models = ["xception", "recce", "ucf", "capsule_net", "ffd", "core", "f3net", "spsl", "srm", "dag_fdd", "daw_fdd", "fair_df_detector"]
methods = ["data", "FaceSwap", "SimSwap", "FastReen", "Dual_Generator_Face_Reen", "MaskGan", "StarGAN", "StyGAN", "SDSwap", "DCFace", "Face2Diffusion", "FSRT"]
races = ["Caucasian", "Asian", "African", "Indian"]

def get_filepath(root_path, model, method, race):
    return os.path.join(root_path, model, method+'_'+race+'.npy')

In [None]:
# get AUC

def calculate_auc_for_dataset(root_path, model, methods, races):
    all_confidences = []
    all_labels = []
    for race in races:
        for method in methods:
            filepath = get_filepath(root_path, model, method, race)
            if os.path.exists(filepath):
                confidences = np.load(filepath)
                # Set label based on method
                label = 0 if method == "data" else 1
                labels = np.full(confidences.shape[0], label)
                all_confidences.append(confidences)
                all_labels.append(labels)
    if len(all_confidences) == 0:
        raise ValueError("No confidence scores loaded.")
    
    # Concatenate all confidence scores and labels
    all_confidences = np.concatenate(all_confidences)
    all_labels = np.concatenate(all_labels)
    
    # Calculate AUC
    auc_score = roc_auc_score(all_labels, all_confidences)
    return auc_score

# Displaying the results
for model in models:
    auc_score = calculate_auc_for_dataset(root_path, model, methods, races)
    if auc_score is not None:
        print(f"AUC for model {model}: {auc_score:.4f}")
        

In [None]:
# get ACC
from sklearn.metrics import accuracy_score
def calculate_auc_for_dataset(root_path, model, methods, races):
    all_confidences = []
    all_labels = []
    
    for race in races:
        for method in methods:
            filepath = get_filepath(root_path, model, method, race)
            if os.path.exists(filepath):
                confidences = np.load(filepath)
                # Set label based on method
                label = 0 if method == "data" else 1
                labels = np.full(confidences.shape[0], label)
                
                all_confidences.append(confidences)
                all_labels.append(labels)
    
    if len(all_confidences) == 0:
        raise ValueError("No confidence scores loaded.")
    
    # Concatenate all confidence scores and labels
    all_confidences = np.concatenate(all_confidences)
    all_labels = np.concatenate(all_labels)
    
    # Calculate ACC
    predicted_labels = (all_confidences >= 0.5).astype(int)
    accuracy = accuracy_score(all_labels, predicted_labels)
    return accuracy

# Displaying the results
for model in models:
    accuracy = calculate_auc_for_dataset(root_path, model, methods, races)
    if accuracy is not None:
        print(f"ACC for model {model}: {accuracy:.4f}")


In [None]:
# Metric
def calculate_naive_metric(data, metric="DPD"):
    overall_result = np.mean(data, axis=0)
    real_result = data[0]
    fake_result = np.mean(data[1:], axis=0)

    if metric == "DPD":
        DPD = np.ptp(overall_result)
        print(f"{DPD:.4f}")
    elif metric == "DEOdds":
        DEOdds = 0.5*(np.ptp(real_result) + np.ptp(fake_result))
        print(f"{DEOdds:.4f}")
    elif metric == "DEO":
        DEO = np.ptp(fake_result)
        print(f"{DEO:.4f}")
    elif metric == "STD":
        STD = np.std(overall_result)
        print(f"{STD:.4f}")
    else:
        print("No such metric. ")

def calculate_AA_metric(data, metric="DPD"):
    real_data = data[0]
    fake_data = data[1:]

    if metric == "DPD":
        DPD = np.mean(np.ptp(data, axis=1))
        print(f"{DPD:.4f}")
    elif metric == "DEOdds":
        DEOdds_real = np.ptp(real_data)
        DEOdds_fake = np.mean(np.ptp(fake_data, axis=1))
        DEOdds = 0.5 * (DEOdds_real + DEOdds_fake)
        print(f"{DEOdds:.4f}")
    elif metric == "DEO":
        DEO = np.mean(np.ptp(fake_data, axis=1))
        print(f"{DEO:.4f}")
    elif metric == "STD":
        STD = np.mean(np.std(data, axis=1))
        print(f"{STD:.4f}")
    else:
        print("No such metric. ")

def calculate_UR_metric(data, metric="DPD"):
    avg_perf = np.mean(data, axis=1) + 1
    real_data = data[0]
    real_perf = avg_perf[0]
    fake_data = data[1:]
    fake_perf = avg_perf[1:]

    if metric == "DPD":
        DPD = np.mean(np.ptp(data, axis=1) / avg_perf)
        print(f"{DPD:.4f}")
    elif metric == "DEOdds":
        DEOdds_real = np.ptp(real_data) / real_perf
        DEOdds_fake = np.mean(np.ptp(fake_data, axis=1) / fake_perf)
        DEOdds = 0.5 * (DEOdds_real + DEOdds_fake)
        print(f"{DEOdds:.4f}")
    elif metric == "DEO":
        DEO = np.mean(np.ptp(fake_data, axis=1) / fake_perf)
        print(f"{DEO:.4f}")
    elif metric == "STD":
        STD = np.mean(np.std(data, axis=1) / avg_perf)
        print(f"{STD:.4f}")
    else:
        print("No such metric. ")

In [None]:
# get ACC
acc_results = {}
for model in models:
    acc_results[model] = {}
    for method in methods:
        acc_results[model][method] = {}
        for race in races:
            filepath = get_filepath(root_path, model, method, race)
            if os.path.exists(filepath):
                data = np.load(filepath)
                if method == "data":
                    labels = np.zeros(data.shape[0])
                else:
                    labels = np.ones(data.shape[0])
                
                # Assuming binary classification, threshold is 0.5
                predictions = (data >= 0.5).astype(int)
                accuracy = np.mean(predictions == labels)
                
                acc_results[model][method][race] = accuracy
            else:
                acc_results[model][method][race] = None  # File does not exist

In [None]:
# get Benckmark result
benckmark_dict = {}
for model in acc_results:
    temp_result = []
    for method in acc_results[model]:
        temp_result.append(list(acc_results[model][method].values()))
    benckmark_dict[model] = np.array(temp_result)

for model in benckmark_dict:
    print(model)
    for metric in ["DPD", "DEOdds", "DEO", "STD"]:
        print(metric)
        calculate_naive_metric(benckmark_dict[model], metric=metric)
        calculate_AA_metric(benckmark_dict[model], metric=metric)
        calculate_UR_metric(benckmark_dict[model], metric=metric)
    print()