In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Forget01

In [None]:
import os
import json
import re

metric_keys = [
    'extraction_strength', 'extraction_strength_forget_para', 'extraction_strength_forget_para_pqa', 'extraction_strength_forget_para_pqpa', 
    'extraction_strength_ra', 'extraction_strength_retain', 'extraction_strength_retain_para', 'extraction_strength_retain_para_pqa',
    'extraction_strength_retain_para_pqpa', 'extraction_strength_wf'
]

def get_all_checkpoints_eval_paths(task_dir):
    eval_paths = []
    if not os.path.exists(task_dir):
        return eval_paths
    for name in os.listdir(task_dir):
        match = re.match(r"checkpoint-(\d+)", name)
        if match:
            ckpt_num = int(match.group(1))
            eval_file = os.path.join(task_dir, name, "evals", "TOFU_SUMMARY.json")
            if os.path.exists(eval_file):
                eval_paths.append((ckpt_num, eval_file))
    return sorted(eval_paths, key=lambda x: x[0]) 


def epoch_alpha_beta(base_dir, models, trainers_experiments, epochs, alphas, betas, splits, metric_keys):
    metrics_all = {key: [] for key in metric_keys}
    combi_list = []

    for model in models:
        for trainer in trainers_experiments:
            train_item_loss_type, train_item = trainer.split()
            for epoch in epochs:
                for alpha in alphas:
                    for beta in betas:
                        for split in splits:
                            task_name = f"tofu_{model}/{split}/{train_item_loss_type}/tofu_{model}_{split}_{train_item}_epoch{epoch}_alpha{alpha}_beta{beta}"
                            task_dir = os.path.join(base_dir, task_name)

                            eval_paths = get_all_checkpoints_eval_paths(task_dir)

                            if not eval_paths or len(eval_paths) != 4:
                                print(f"{task_dir} {len(eval_paths)}")
                                for key in metric_keys:
                                    metrics_all[key].append(None)
                                combi_list.append({
                                    "model": model,
                                    "trainer": trainer,
                                    "epoch": epoch,
                                    "alpha": alpha,
                                    "beta": beta,
                                    "split": split,
                                    "checkpoint": None
                                })
                                continue

                            for ckpt_num, json_path in eval_paths:
                                try:
                                    with open(json_path, 'r') as f:
                                        data = json.load(f)
                                        for key in metric_keys:
                                            value = data.get(key, None)
                                            if value is None:
                                                print(f"{key}: {json_path}")
                                            metrics_all[key].append(value)
                                except Exception as e:
                                    print(f"{json_path}: {e}")
                                    for key in metric_keys:
                                        metrics_all[key].append(None)

                                combi_list.append({
                                    "model": model,
                                    "trainer": trainer,
                                    "epoch": epoch,
                                    "alpha": alpha,
                                    "beta": beta,
                                    "split": split,
                                    "checkpoint": ckpt_num
                                })

    return metrics_all, combi_list


def epoch_alpha(base_dir, models, trainers_experiments, epochs, alphas, splits, metric_keys):
    metrics_all = {key: [] for key in metric_keys}
    combi_list = []

    for model in models:
        for trainer in trainers_experiments:
            train_item_loss_type, train_item = trainer.split()
            for epoch in epochs:
                for alpha in alphas:
                    for split in splits:
                        task_name = f"tofu_{model}/{split}/{train_item_loss_type}/tofu_{model}_{split}_{train_item}_epoch{epoch}_alpha{alpha}"
                        task_dir = os.path.join(base_dir, task_name)

                        eval_paths = get_all_checkpoints_eval_paths(task_dir)
                    
                        if not eval_paths or len(eval_paths) != 4:
                            print(f"{task_dir} {len(eval_paths)}")
                            for key in metric_keys:
                                metrics_all[key].append(None)
                            combi_list.append({
                                "model": model,
                                "trainer": trainer,
                                "epoch": epoch,
                                "alpha": alpha,
                                "split": split,
                                "checkpoint": None
                            })
                            continue

                        for ckpt_num, json_path in eval_paths:
                            try:
                                with open(json_path, 'r') as f:
                                    data = json.load(f)
                                    for key in metric_keys:
                                        value = data.get(key, None)
                                        if value is None:
                                            print(f"{key}: {json_path}")
                                        metrics_all[key].append(value)
                            except Exception as e:
                                print(f" {json_path}: {e}")
                                for key in metric_keys:
                                    metrics_all[key].append(None)

                            combi_list.append({
                                "model": model,
                                "trainer": trainer,
                                "epoch": epoch,
                                "alpha": alpha,
                                "split": split,
                                "checkpoint": ckpt_num
                            })

    return metrics_all, combi_list


def epoch(base_dir, models, trainers_experiments, epochs, splits, metric_keys):
    metrics_all = {key: [] for key in metric_keys}
    combi_list = []

    for model in models:
        for trainer in trainers_experiments:
            train_item_loss_type, train_item = trainer.split()
            for epoch in epochs:
                for split in splits:
                    task_name = f"tofu_{model}/{split}/{train_item_loss_type}/tofu_{model}_{split}_{train_item}_epoch{epoch}"
                    task_dir = os.path.join(base_dir, task_name)

                    eval_paths = get_all_checkpoints_eval_paths(task_dir)
                    
                    if not eval_paths or len(eval_paths) != 4:
                        print(f"{task_dir} {len(eval_paths)}")
                        for key in metric_keys:
                            metrics_all[key].append(None)
                        combi_list.append({
                            "model": model,
                            "trainer": trainer,
                            "epoch": epoch,
                            "split": split,
                            "checkpoint": None
                        })
                        continue

                    for ckpt_num, json_path in eval_paths:
                        try:
                            with open(json_path, 'r') as f:
                                data = json.load(f)
                                for key in metric_keys:
                                    value = data.get(key, None)
                                    if value is None:
                                        print(f"{key}: {json_path}")
                                    metrics_all[key].append(value)
                        except Exception as e:
                            print(f"{json_path}: {e}")
                            for key in metric_keys:
                                metrics_all[key].append(None)

                        combi_list.append({
                            "model": model,
                            "trainer": trainer,
                            "epoch": epoch,
                            "split": split,
                            "checkpoint": ckpt_num
                        })

    return metrics_all, combi_list


In [None]:
base_dir = 'yourpath/saves/unlearn'
models = ["Llama-3.1-8B-Instruct"]
splits = ["forget01"]  # Can change to forget05, forget10

trainers_experiments = ["DPO_GDR DPO"]
epochs = ["4"]
alphas = ["1", "2", "10", "20"]
betas = ["0.1", "1", "10"]
dpo_gdr_result, dpo_gdpr_combo = epoch_alpha_beta(
    base_dir=base_dir, models=models, trainers_experiments=trainers_experiments,
    epochs=epochs, alphas=alphas, betas=betas, splits=splits, metric_keys=metric_keys
)

trainers_experiments = ["DPO_KL DPO"]
epochs = ["4"]
alphas = ["1", "2", "10", "20"]
betas = ["0.1", "1", "10"]
dpo_kl_result, dpo_kl_combo = epoch_alpha_beta(
    base_dir=base_dir, models=models, trainers_experiments=trainers_experiments,
    epochs=epochs, alphas=alphas, betas=betas, splits=splits, metric_keys=metric_keys
)

trainers_experiments = ["NPO_GDR NPO"]
epochs = ["4"]
alphas = ["1", "2", "10", "20"]
betas = ["0.1", "1", "10"]
npo_gdr_result, npo_gdpr_combo = epoch_alpha_beta(
    base_dir=base_dir, models=models, trainers_experiments=trainers_experiments,
    epochs=epochs, alphas=alphas, betas=betas, splits=splits, metric_keys=metric_keys
)

trainers_experiments = ["NPO_KL NPO"]
epochs = ["4"]
alphas = ["1", "2", "10", "20"]
betas = ["0.1", "1", "10"]
npo_kl_result, npo_kl_combo = epoch_alpha_beta(
    base_dir=base_dir, models=models, trainers_experiments=trainers_experiments,
    epochs=epochs, alphas=alphas, betas=betas, splits=splits, metric_keys=metric_keys
)

trainers_experiments = ["GradAscent GradAscent"]
epochs = ["4"]
ga_result, ga_combo = epoch(
    base_dir=base_dir, models=models, trainers_experiments=trainers_experiments,
    epochs=epochs, splits=splits, metric_keys=metric_keys
)

trainers_experiments = ["GradDiff_GDR GradDiff"]
epochs = ["4"]
alphas = ["1", "2", "10", "20"]
gd_gdr_result, gd_gdr_combo = epoch_alpha(
    base_dir=base_dir, models=models, trainers_experiments=trainers_experiments,
    epochs=epochs, alphas=alphas, splits=splits, metric_keys=metric_keys
)

trainers_experiments = ["GradDiff_KL GradDiff"]
epochs = ["4"]
alphas = ["1", "2", "10", "20"]
gd_kl_result, gd_kl_combo = epoch_alpha(
    base_dir=base_dir, models=models, trainers_experiments=trainers_experiments,
    epochs=epochs, alphas=alphas, splits=splits, metric_keys=metric_keys
)

In [None]:
retain_path = "yourpath/saves/finetune/tofu_Llama-3.1-8B-Instruct_retain99/evals/TOFU_SUMMARY.json"  # Can change to retain95, retain90
full_path = "yourpath/saves/finetune/tofu_Llama-3.1-8B-Instruct_full/evals_forget01/TOFU_SUMMARY.json"  # Can change to forget05, forget10

with open(retain_path, "r") as f:
    retain_metrics = json.load(f)
with open(full_path, "r") as f:
    full_metrics = json.load(f)

In [None]:
full_model_utility = full_metrics['extraction_strength_retain']
threshold = full_model_utility * 0.95
print(f"[INFO] Full of extraction_strength_retain: {full_model_utility:.6f}")
print(f"[INFO] Threshold for extraction_strength_retain (95% of full): {threshold:.6f}")
print()

def find_best_result(metrics_dict, combi_list, threshold, name):
    best_alpha = None
    best_fq = float('inf')
    best_mu = None

    for alpha, fq, mu in zip(combi_list, metrics_dict["extraction_strength"], metrics_dict["extraction_strength_retain"]):
        if mu is not None and fq is not None and mu >= threshold:
            if fq <= best_fq:
                best_fq = fq
                best_alpha = alpha
                best_mu = mu

    if best_alpha is None:
        print(f"[WARN] {name} No alpha satisfying the threshold. Using fallback: max extraction_strength_retain.")
        max_idx = None
        max_mu = -float('inf')

        for idx, mu in enumerate(metrics_dict["extraction_strength_retain"]):
            if mu is not None and mu > max_mu:
                max_mu = mu
                max_idx = idx

        if max_idx is not None:
            best_alpha = combi_list[max_idx]
            best_mu = metrics_dict["extraction_strength_retain"][max_idx]
            best_fq = metrics_dict["extraction_strength"][max_idx]
        else:
            best_alpha = {}
            best_mu = None
            best_fq = None

    return best_alpha, best_fq, best_mu


alpha_dpo_gdr, fq_dpo_gdr, mu_dpo_gdr = find_best_result(dpo_gdr_result, dpo_gdpr_combo, threshold, 'dpo_gdpr')
alpha_dpo_kl, fq_dpo_kl, mu_dpo_kl = find_best_result(dpo_kl_result, dpo_kl_combo, threshold, 'dpo_kl')
alpha_npo_gdr, fq_npo_gdr, mu_npo_gdr = find_best_result(npo_gdr_result, npo_gdpr_combo, threshold, 'npo_gdr')
alpha_npo_kl, fq_npo_kl, mu_npo_kl = find_best_result(npo_kl_result, npo_kl_combo, threshold, 'npo_kl')
alpha_ga, fq_ga, mu_ga = find_best_result(ga_result, ga_combo, threshold, 'ga')
alpha_gd_gdr, fq_gd_gdr, mu_gd_gdr = find_best_result(gd_gdr_result, gd_gdr_combo, threshold, 'gd_gdr')
alpha_gd_kl, fq_gd_kl, mu_gd_kl = find_best_result(gd_kl_result, gd_kl_combo, threshold, 'gd_kl')


print(f"- DPO-GDR     : forget = {fq_dpo_gdr}, retain = {mu_dpo_gdr},  alpha = {alpha_dpo_gdr.values()}")
print(f"- DPO-KL      : forget = {fq_dpo_kl}, retain = {mu_dpo_kl},  alpha = {alpha_dpo_kl.values()}")
print(f"- NPO-GDR     : forget = {fq_npo_gdr}, retain = {mu_npo_gdr},  alpha = {alpha_npo_gdr.values()}")
print(f"- NPO-KL      : forget = {fq_npo_kl}, retain = {mu_npo_kl},  alpha = {alpha_npo_kl.values()}")
print(f"- GA          : forget = {fq_ga}, retain = {mu_ga},  alpha = {alpha_ga.values()}")
print(f"- GD-GDR      : forget = {fq_gd_gdr}, retain = {mu_gd_gdr},  alpha = {alpha_gd_gdr.values()}")
print(f"- GD-KL       : forget = {fq_gd_kl}, retain = {mu_gd_kl},  alpha = {alpha_gd_kl.values()}")


In [None]:
full_model_utility = full_metrics['extraction_strength_retain']
threshold = full_model_utility * 0.90
print(f"[INFO] Full of extraction_strength_retain: {full_model_utility:.6f}")
print(f"[INFO] Threshold for extraction_strength_retain (90% of full): {threshold:.6f}")
print()

def find_best_result(metrics_dict, combi_list, threshold, name):
    best_alpha = None
    best_fq = float('inf')
    best_mu = None

    for alpha, fq, mu in zip(combi_list, metrics_dict["extraction_strength"], metrics_dict["extraction_strength_retain"]):
        if mu is not None and fq is not None and mu >= threshold:
            if fq <= best_fq:
                best_fq = fq
                best_alpha = alpha
                best_mu = mu

    if best_alpha is None:
        print(f"[WARN] {name} No alpha satisfying the threshold. Using fallback: max extraction_strength_retain.")
        max_idx = None
        max_mu = -float('inf')

        for idx, mu in enumerate(metrics_dict["extraction_strength_retain"]):
            if mu is not None and mu > max_mu:
                max_mu = mu
                max_idx = idx

        if max_idx is not None:
            best_alpha = combi_list[max_idx]
            best_mu = metrics_dict["extraction_strength_retain"][max_idx]
            best_fq = metrics_dict["extraction_strength"][max_idx]
        else:
            best_alpha = {}
            best_mu = None
            best_fq = None

    return best_alpha, best_fq, best_mu

alpha_dpo_gdr, fq_dpo_gdr, mu_dpo_gdr = find_best_result(dpo_gdr_result, dpo_gdpr_combo, threshold, 'dpo_gdpr')
alpha_dpo_kl, fq_dpo_kl, mu_dpo_kl = find_best_result(dpo_kl_result, dpo_kl_combo, threshold, 'dpo_kl')
alpha_npo_gdr, fq_npo_gdr, mu_npo_gdr = find_best_result(npo_gdr_result, npo_gdpr_combo, threshold, 'npo_gdr')
alpha_npo_kl, fq_npo_kl, mu_npo_kl = find_best_result(npo_kl_result, npo_kl_combo, threshold, 'npo_kl')
alpha_ga, fq_ga, mu_ga = find_best_result(ga_result, ga_combo, threshold, 'ga')
alpha_gd_gdr, fq_gd_gdr, mu_gd_gdr = find_best_result(gd_gdr_result, gd_gdr_combo, threshold, 'gd_gdr')
alpha_gd_kl, fq_gd_kl, mu_gd_kl = find_best_result(gd_kl_result, gd_kl_combo, threshold, 'gd_kl')

print(f"- DPO-GDR     : forget = {fq_dpo_gdr}, retain = {mu_dpo_gdr},  alpha = {alpha_dpo_gdr.values()}")
print(f"- DPO-KL      : forget = {fq_dpo_kl}, retain = {mu_dpo_kl},  alpha = {alpha_dpo_kl.values()}")
print(f"- NPO-GDR     : forget = {fq_npo_gdr}, retain = {mu_npo_gdr},  alpha = {alpha_npo_gdr.values()}")
print(f"- NPO-KL      : forget = {fq_npo_kl}, retain = {mu_npo_kl},  alpha = {alpha_npo_kl.values()}")
print(f"- GA          : forget = {fq_ga}, retain = {mu_ga},  alpha = {alpha_ga.values()}")
print(f"- GD-GDR      : forget = {fq_gd_gdr}, retain = {mu_gd_gdr},  alpha = {alpha_gd_gdr.values()}")
print(f"- GD-KL       : forget = {fq_gd_kl}, retain = {mu_gd_kl},  alpha = {alpha_gd_kl.values()}")


In [None]:
import math
import pandas as pd

target_alphas = {
    "GA": alpha_ga,
    "GD_GDR": alpha_gd_gdr,
    "GD_KL": alpha_gd_kl,    
    "DPO_GDR": alpha_dpo_gdr,
    "DPO_KL": alpha_dpo_kl,
    "NPO_GDR": alpha_npo_gdr,
    "NPO_KL": alpha_npo_kl,
}

metrics_combo_dicts = {
    "GA": (ga_result, ga_combo),
    "GD_GDR": (gd_gdr_result, gd_gdr_combo),
    "GD_KL": (gd_kl_result, gd_kl_combo),
    "DPO_GDR": (dpo_gdr_result, dpo_gdpr_combo),
    "DPO_KL": (dpo_kl_result, dpo_kl_combo),
    "NPO_GDR": (npo_gdr_result, npo_gdpr_combo),
    "NPO_KL": (npo_kl_result, npo_kl_combo),
}


metric_keys = [
    'extraction_strength_retain', 'extraction_strength_retain_para_pqa',
    "extraction_strength_ra", "extraction_strength_wf",
    'extraction_strength', 'extraction_strength_forget_para_pqa'
]


def get_all_metrics(metrics_dict, combi_list, target_alpha, metric_keys):
    results = {}
    if target_alpha not in combi_list:
        return {k: None for k in metric_keys}

    idx = combi_list.index(target_alpha)

    for key in metric_keys:
        vals = metrics_dict.get(key, [None]*len(combi_list))
        results[key] = vals[idx] if idx < len(vals) else None
    return results


def extract_metrics_direct(metrics_dict, metric_keys):
    row = {}
    for key in metric_keys:
        row[key] = metrics_dict.get(key, None)
    return row

rows = []


rows.append({
    "Method": "RETAIN",
    "Alpha": "N/A",
    **extract_metrics_direct(retain_metrics, metric_keys)
})

rows.append({
    "Method": "FULL",
    "Alpha": "N/A",
    **extract_metrics_direct(full_metrics, metric_keys)
})


for method, alpha in target_alphas.items():
    metrics_dict, combi_list = metrics_combo_dicts[method]
    row = {"Method": method, "Alpha": alpha}
    row.update(get_all_metrics(metrics_dict, combi_list, alpha, metric_keys))
    rows.append(row)


df = pd.DataFrame(rows)
print(df.to_string(index=False)) 
