In [11]:
import os
import glob
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict

# Set plot style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set(font_scale=1.2)
plt.rcParams['figure.figsize'] = (14, 8)

In [12]:
# Base directory for checkpoints
base_dir = "/u/kdkyum/ptmp_link/workdir/continual_unlearn/checkpoints"

# Function to find all available methods with continual_unlearn suffix
def find_methods():
    methods = []
    if os.path.exists(base_dir):
        for item in os.listdir(base_dir):
            if item.endswith("_continual_unlearn") and os.path.isdir(os.path.join(base_dir, item)):
                methods.append(item)
    return methods

# Get all methods
methods = find_methods()
print(f"Found methods: {methods}")

# Identify available datasets for each method
datasets = {}
for method in methods:
    method_dir = os.path.join(base_dir, method)
    datasets[method] = [d for d in os.listdir(method_dir) if os.path.isdir(os.path.join(method_dir, d))]
    print(f"Method {method} has datasets: {datasets[method]}")

Found methods: ['FT_continual_unlearn', 'synaptag_GA_continual_unlearn', 'retrain_continual_unlearn', 'NG_continual_unlearn', 'synaptag_continual_unlearn', 'synaptag_RL_continual_unlearn', 'GA_continual_unlearn', 'RL_continual_unlearn']
Method FT_continual_unlearn has datasets: ['cifar10', 'cifar100']
Method synaptag_GA_continual_unlearn has datasets: ['cifar10', 'cifar100']
Method retrain_continual_unlearn has datasets: ['cifar10', 'cifar100']
Method NG_continual_unlearn has datasets: ['cifar10', 'cifar100']
Method synaptag_continual_unlearn has datasets: ['cifar10', 'cifar100']
Method synaptag_RL_continual_unlearn has datasets: ['cifar10', 'cifar100']
Method GA_continual_unlearn has datasets: ['cifar10', 'cifar100']
Method RL_continual_unlearn has datasets: ['cifar10', 'cifar100']


In [13]:
def load_evaluation_results(method, dataset):
    """Load evaluation results for a specific method and dataset"""
    results = []
    method_dir = os.path.join(base_dir, method, dataset)
    
    if not os.path.exists(method_dir):
        print(f"Directory not found: {method_dir}")
        return results
    
    # Get all forget stages
    forget_stages = []
    for stage_dir in os.listdir(method_dir):
        stage_path = os.path.join(method_dir, stage_dir)
        if os.path.isdir(stage_path):
            try:
                # Handle both underscore and hyphen formats (e.g., '0_1' or '0-1')
                if '-' in stage_dir:
                    begin, end = map(int, stage_dir.split('-'))
                elif '_' in stage_dir:
                    begin, end = map(int, stage_dir.split('_'))
                else:
                    # Skip directories that don't follow either pattern
                    raise ValueError(f"Directory name format not recognized: {stage_dir}")
                    
                forget_stages.append((begin, end, stage_dir, stage_path))
            except Exception as e:
                print(f"Skipping directory with invalid format: {stage_dir} - {str(e)}")
    
    # Sort by end class for proper ordering
    forget_stages.sort(key=lambda x: x[1])
    
    # Load results for each stage
    for begin, end, stage_dir, stage_path in forget_stages:
        eval_file = os.path.join(stage_path, 'evaluation_results.json')
        if os.path.exists(eval_file):
            try:
                with open(eval_file, 'r') as f:
                    data = json.load(f)
                    results.append({
                        'method': method,
                        'dataset': dataset,
                        'forget_class_begin': begin,
                        'forget_class_end': end,
                        'data': data,
                    })
            except Exception as e:
                print(f"Error loading {eval_file}: {e}")
    
    return results

def extract_metrics(results):
    """Extract key metrics from loaded results into a structured DataFrame"""
    metrics_data = []
    
    for result in results:
        method = result['method']
        method_display = method.replace('_continual_unlearn', '')
        dataset = result['dataset']
        forget_begin = result['forget_class_begin']
        forget_end = result['forget_class_end']
        data = result['data']
        
        # Extract common metrics
        metrics = {
            'method': method_display,
            'dataset': dataset,
            'forget_class_begin': forget_begin,
            'forget_class_end': forget_end,
            'classes_forgotten': forget_end - forget_begin,
            'unlearning_time': data.get('unlearning_time', None)
        }
        
        # Extract accuracy metrics
        if 'accuracy' in data:
            if isinstance(data['accuracy'], dict):
                for key, value in data['accuracy'].items():
                    metrics[f'accuracy_{key}'] = value
            else:
                metrics['accuracy'] = data['accuracy']

        for x in data["class_wise_accuracy"]:
            metrics[f'accuracy_class_{x["class"]}'] = x.get('accuracy', None)
        
        # Extract MIA metrics
        if 'SVC_MIA_forget_efficacy' in data:
            for key, value in data['SVC_MIA_forget_efficacy'].items():
                metrics[f'mia_forget_{key}'] = value
                
        metrics_data.append(metrics)
    
    return pd.DataFrame(metrics_data)

In [None]:
# Load all results
all_results = []
for method in methods:
    # for dataset in datasets[method]:
    method_results = load_evaluation_results(method, "cifar10")
    all_results.extend(method_results)

# Convert to DataFrame for easier analysis
df = extract_metrics(all_results)

# Show basic stats
print(f"Loaded {len(df)} evaluation results")
print(f"Methods: {df['method'].unique()}")
print(f"Datasets: {df['dataset'].unique()}")

# Display the first few rows
df.head()

Skipping directory with invalid format: masks - Directory name format not recognized: masks
Loaded 144 evaluation results
Methods: ['FT' 'synaptag_GA' 'retrain' 'NG' 'synaptag' 'synaptag_RL' 'GA' 'RL']
Datasets: ['cifar100']


Unnamed: 0,method,dataset,forget_class_begin,forget_class_end,classes_forgotten,unlearning_time,accuracy_retain,accuracy_forget,accuracy_val,accuracy_test,...,accuracy_class_95,accuracy_class_96,accuracy_class_97,accuracy_class_98,accuracy_class_99,mia_forget_correctness,mia_forget_confidence,mia_forget_entropy,mia_forget_m_entropy,mia_forget_prob
0,FT,cifar100,0,4,4,151.605011,97.417544,2.533333,88.7,69.43,...,54.0,61.0,74.0,59.0,66.0,0.974667,0.999111,0.831556,1.0,0.855111
1,FT,cifar100,0,9,9,143.148822,97.338272,13.422222,85.24,64.68,...,73.0,60.0,72.0,78.0,63.0,0.865778,0.988889,0.848,1.0,0.954222
2,FT,cifar100,0,14,14,133.531575,98.392157,25.777778,82.74,63.67,...,55.0,72.0,70.0,75.0,72.0,0.742222,0.972,0.864889,1.0,0.907111
3,FT,cifar100,0,19,19,126.80769,96.138889,12.311111,75.26,57.5,...,81.0,61.0,71.0,65.0,70.0,0.876889,0.981333,0.908444,1.0,0.847111
4,FT,cifar100,0,24,24,121.599784,99.194074,32.977778,74.18,57.75,...,69.0,51.0,84.0,73.0,64.0,0.670222,0.989333,0.948889,1.0,0.876444


In [None]:
df.to_csv("eval_results_for_cifar10.csv", index=False)