# Result Analysis

In [None]:
import ast
import numpy as np
import pandas as pd
import os


def parse_metrics(metrics_str):
    lines = metrics_str.strip().split('\n')
    
    metrics_dict = {}
    
    for line in lines:
        if ': ' in line:
            key, value = line.split(': ', 1)
            try:
                value = ast.literal_eval(value)
            except (ValueError, SyntaxError):
                pass
            metrics_dict[key] = value
    
    return metrics_dict

def read_txt_files(folder):
    import os
    files = os.listdir(folder)
    texts = []
    for file in files:
        if file.endswith(".txt"):
            with open(folder + "/" + file, "r") as f:
                texts.append(f.read())
    return texts

In [None]:
def get_subfolders(folder):
    subfolders = [f.path for f in os.scandir(folder) if f.is_dir()]
    return subfolders


def highlight_best_and_second_best(df, column, ascending=True):
    filtered_df = df[~df.index.isin(['gold', 'full'])]
    sorted_df = filtered_df.sort_values(by=column, ascending=ascending)
    best_value = sorted_df.iloc[0][column]
    second_best_value = sorted_df.iloc[1][column]
    return best_value, second_best_value

def format_value(value, best_value, second_best_value):
    if value == best_value:
        return f"\\textbf{{{value}}}"
    elif value == second_best_value:
        return f"\\underline{{{value}}}"
    else:
        return f"{value}"

def df_to_latex_table(df):
    # Define the LaTeX table header
    header = r"""\begin{table*}[]
\centering
\resizebox{\linewidth}{!}{
\begin{tabular}{l c c c c | c c | c c c | c }
\midrule\hline 

DS & LR & Unseen Acc_F1 & Unseen Acc_Acc & Forget acc_F1 & Forget acc_Acc & MIA_Forgeting Score & GUM & Unlearning Time \\
\midrule\hline \\ 
"""

    best_test_acc, second_best_test_acc = highlight_best_and_second_best(df, '$acc_{test}$', ascending=False)
    best_forget_acc, second_best_forget_acc = highlight_best_and_second_best(df, '$acc_{forget}$', ascending=False)
    best_test_f1, second_best_test_f1 = highlight_best_and_second_best(df, '$f1_{test}$', ascending=False)
    best_forget_f1, second_best_forget_f1 = highlight_best_and_second_best(df, '$f1_{forget}$', ascending=False)
    best_umia, second_best_umia = highlight_best_and_second_best(df, 'MIA', ascending=True)
    best_GUM, second_best_GUM = highlight_best_and_second_best(df, 'GUM', ascending=False)
    best_time, second_best_time = highlight_best_and_second_best(df, 'Time', ascending=True)

    rows = []
    for _, row in df.iterrows():
        # Convert values into "mean ± std" format
        test_acc = f"{format_value(row['$acc_{test}$'], best_test_acc, second_best_test_acc)}"
        forget_acc = f"{format_value(row['$acc_{forget}$'], best_forget_acc, second_best_forget_acc)}"
        test_f1 = f"{format_value(row['$f1_{test}$'], best_test_f1, second_best_test_f1)}"
        forget_f1 = f"{format_value(row['$f1_{forget}$'], best_forget_f1, second_best_forget_f1)}"
        umia = f"{format_value(row['MIA'], best_umia, second_best_umia)}"
        GUM = f"{format_value(row['GUM'], best_GUM, second_best_GUM)}"
        time = f"{format_value(row['Time'], best_time, second_best_time)}"
        #speedup = f"{row['speedup']:.1f}"
        lr = row['lr']

        rows.append(f"& {row.name} & {lr} & {test_f1} & {test_acc} & {forget_f1} & {forget_acc} & {umia} & {GUM} & - \\\\")

    # Define the LaTeX table footer
    footer = r"""\midrule\hline 

\end{tabular}}
\caption{Experimental results of different unlearning methods.}
\label{tab:unlearning_results}
\end{table*}"""

    latex_table = "\n".join([header] + rows + [footer])

    return latex_table

folder = "results/sm_right_test/de-DE/xlsr53-de"
subfolders = get_subfolders(folder)
subfolders

In [None]:
res = {}

for f in subfolders:
    texts = read_txt_files(f)
    name = f.split("/")[-1]
    if len(texts) == 0:
        continue
    res[name] = {}
    for t in texts:
        parsed_info = parse_metrics(t)
        for key, value in parsed_info.items():
            if isinstance(value, dict):
                for k, v in value.items():
                    new_key = f"{key}_{k}"
                    if new_key not in res[name]:
                        res[name][new_key] = []
                    res[name][new_key].append(v)
            else:
                if key not in res[name]:
                    res[name][key] = []
                res[name][key].append(value)
    for key, value in res[name].items():
        res[name][key] = round(np.mean(value), 3)



df = pd.DataFrame(res).T
df = df.drop(columns=['Final Score'])
df = df[['Test Acc_F1', 'Unseen Acc_F1', 'Forget acc_F1', 'Test Acc_Acc', 'Unseen Acc_Acc', 'Forget acc_Acc',  'MIA_MIA', 'MIA_Forgeting Score', 'Unlearning Time']]
df = df.sort_index()

def format_number(x):
    if isinstance(x, float):
        return f"{x:.3f}".replace('.', ',')
    return x

df = df.applymap(format_number)
df

### Ranking 1

In [None]:
from math import log

def GUM(f1_unl, f1_gold, mia_unl, mia_gold, mia_full, time_unl, time_gold, alfa=1, beta=1):
    mia_unl = min(mia_unl, mia_full)
    mia_gold = min((mia_unl + mia_full) / 2, mia_gold)
    E = 1 - ((mia_unl - mia_gold)/(max(mia_full, mia_unl) - mia_gold))**2
    U = 1 - abs(f1_unl - f1_gold)
    T = 1 - ((log(time_unl + 1)) / (log(time_gold + 1)))

    a = U*E*T
    b = alfa+beta+1
    c = alfa*E*T + beta*U*T + U*E + 1e-6

    return (a*b)/c

folders = ["results/fsc/cnn2d.pth", "results/slurp/w2v2-base-60ep-spkr", "results/italic/hard_speaker/xlsr53-it", "results/italic/hard_speaker/xlsr300", "results/sm_right_test/de-DE/xlsr53-de", "results/sm_right_test/de-DE/xlsr300-de", "results/sm_right_test/fr-FR/xlsr53-fr", "results/sm_right_test/fr-FR/xlsr300-fr"]

full_df = pd.DataFrame()

for folder in folders: 
    subfolders = get_subfolders(folder)

    res = {}

    for f in subfolders:
        texts = read_txt_files(f)
        name = f.split("/")[-1]
        if len(texts) == 0:
            continue
        res[name] = {}
        for t in texts:
            parsed_info = parse_metrics(t)
            for key, value in parsed_info.items():
                if isinstance(value, dict):
                    for k, v in value.items():
                        new_key = f"{key}_{k}"
                        if new_key not in res[name]:
                            res[name][new_key] = []
                        res[name][new_key].append(v)
                else:
                    if key not in res[name]:
                        res[name][key] = []
                    res[name][key].append(value)
        for key, value in res[name].items():
            res[name][key] = round(np.mean(value), 3)

    df = pd.DataFrame(res).T
    df = df.drop(columns=['Final Score'])
    df = df[['Test Acc_F1', 'Unseen Acc_F1', 'Forget acc_F1', 'Test Acc_Acc', 'Unseen Acc_Acc', 'Forget acc_Acc',  'MIA_MIA', 'MIA_Forgeting Score', 'Unlearning Time']]
    df = df.sort_index()
    lrs = []
    for elem in df.index:
        if "full" in elem or "gold" in elem: 
            lrs.append(0)
        else:
            lrs.append(float(elem.split("_")[-1]))

    df['LR'] = lrs

    df.index = [elem.rsplit("_", 1)[0] for elem in df.index]

    GUM_scores = []
    for idx in range(len(df)):
        f1 = df.iloc[idx]['Unseen Acc_Acc']
        forget_score = df.iloc[idx]['MIA_Forgeting Score']
        GUM_scores.append(GUM(f1, forget_score))

    df['GUM'] = GUM_scores

    best_results = {}
    for row in df.iterrows():
        if row[0] not in best_results:
            best_results[row[0]] = {}
            best_results[row[0]]['GUM'] = row[1]['GUM']
            best_results[row[0]]['LR'] = row[1]['LR']
            best_results[row[0]]['model'] = folder.split("/")[1] + "_" + folder.split("/")[-1]
        else:
            if row[1]['GUM'] > best_results[row[0]]['GUM']:
                best_results[row[0]]['GUM'] = row[1]['GUM']
                best_results[row[0]]['LR'] = str(row[1]['LR'])

    df_best = pd.DataFrame(best_results).T
    full_df = pd.concat([full_df, df_best])

pivot_df = full_df.pivot_table(index=full_df.index, columns='model', values='LR', aggfunc='first')
pivot_df = pivot_df.drop(index=['full', 'gold'])
pivot_df

In [None]:
for folder in folders: 
    subfolders = get_subfolders(folder)

    res = {}

    for f in subfolders:
        texts = read_txt_files(f)
        name = f.split("/")[-1]
        if len(texts) == 0:
            continue
        res[name] = {}
        for t in texts:
            parsed_info = parse_metrics(t)
            for key, value in parsed_info.items():
                if isinstance(value, dict):
                    for k, v in value.items():
                        new_key = f"{key}_{k}"
                        if new_key not in res[name]:
                            res[name][new_key] = []
                        res[name][new_key].append(v)
                else:
                    if key not in res[name]:
                        res[name][key] = []
                    res[name][key].append(value)
        for key, value in res[name].items():
            res[name][key] = round(np.mean(value), 3)

    df = pd.DataFrame(res).T
    df = df.drop(columns=['Final Score'])
    df = df[['Test Acc_F1', 'Unseen Acc_F1', 'Forget acc_F1', 'Test Acc_Acc', 'Unseen Acc_Acc', 'Forget acc_Acc',  'MIA_MIA', 'MIA_Forgeting Score', 'Unlearning Time']]
    df = df.sort_index()
    lrs = []
    for elem in df.index:
        if "full" in elem or "gold" in elem: 
            lrs.append(0)
        else:
            lrs.append(float(elem.split("_")[-1]))

    df['LR'] = lrs

    df.index = [elem.rsplit("_", 1)[0] for elem in df.index]

    GUM_scores = []
    for idx in range(len(df)):
        f1 = df.iloc[idx]['Unseen Acc_F1']
        forget_score = df.iloc[idx]['MIA_Forgeting Score']
        GUM_scores.append(GUM(f1, forget_score))

    df['GUM'] = GUM_scores

    indices_to_keep = []

    for idx in range(len(df)):
        if df.iloc[idx].name == "full" or df.iloc[idx].name == "gold": 
            indices_to_keep.append(idx)
        elif float(df.iloc[idx].LR) == float(pivot_df.loc[df.index[idx], folder.split("/")[1] + "_" + folder.split("/")[-1]]):
            indices_to_keep.append(idx)

    df = df.iloc[indices_to_keep]

    df = df.drop(columns=['Test Acc_F1', 'Test Acc_Acc', 'MIA_MIA'])
    columns_order = ["LR", "Unseen Acc_F1", "Unseen Acc_Acc", "Forget acc_F1", "Forget acc_Acc", "MIA_Forgeting Score", "GUM","Unlearning Time"]
    df = df[columns_order]

    def format_lr(lr):
        if lr == 0:
            return "full"
        return f"{lr:.0e}"

    df['LR'] = df['LR'].apply(format_lr) 


    df = df.applymap(format_number)

    columns_name = ["lr", "$f1_{test}$", "$acc_{test}$", "$f1_{forget}$", "$acc_{forget}$", "MIA", "GUM", "Time"]
    df.columns = columns_name

    latex_code = df_to_latex_table(df)

    print(folder.split("/")[1] + "_" + folder.split("/")[-1])
    print(latex_code)  
    print("\n")