In [1]:
import os
import json
import numpy as np
import pandas as pd

In [2]:
def summarize_json_to_df(json_folder, std_flag=False):
    metrics_list = []
    portability_type = 'subject_aliasing'

    for filename in os.listdir(json_folder):
        if filename.endswith('.json'):
            filepath = os.path.join(json_folder, filename)
            with open(filepath, 'r') as file:
                metrics = json.load(file)
                base_filename = filename.replace('_results.json', '')
                edit_method, model = base_filename.split('_', 1)
                
                def calc_metric(metric_list, percent=100):
                    mean_val = np.mean(metric_list)
                    std_val = np.std(metric_list)
                    if std_flag:
                        return f"{mean_val*percent:.2f}±{std_val:.2f}"
                    else:
                        return np.round(mean_val*percent, 2)

                results = {
                    "edit_method": edit_method,
                    "model": model,
                    "Reliability_pre": calc_metric([e['pre']['rewrite_acc'][0] for e in metrics]),
                    "Reliability_post": calc_metric([e['post']['rewrite_acc'][0] for e in metrics]),
                    "Generalization_pre": calc_metric([e['pre']['rephrase_acc'][0] for e in metrics]),
                    "Generalization_post": calc_metric([e['post']['rephrase_acc'][0] for e in metrics]),
                    "Portability_pre": calc_metric([e['pre']['portability'][f'{portability_type}_acc'][0] for e in metrics]),
                    "Portability_post": calc_metric([e['post']['portability'][f'{portability_type}_acc'][0] for e in metrics]),
                }
                metrics_list.append(results)
    df = pd.DataFrame(metrics_list)
    df['edit_method'] = pd.Categorical(df['edit_method'], ["ROME", "FT-M", "ICL"])
    df['Reli_increase'] = df.apply(lambda x: x['Reliability_post'] - x['Reliability_pre'], axis=1)
    df['Gene_increase'] = df.apply(lambda x: x['Generalization_post'] - x['Generalization_pre'], axis=1)
    df['Port_increase'] = df.apply(lambda x: x['Portability_post'] - x['Portability_pre'], axis=1)
    return df[['edit_method', 'model', 'Reliability_pre', 'Reliability_post', 'Reli_increase', 'Generalization_pre', 
               'Generalization_post', 'Gene_increase', 'Portability_pre', 'Portability_post', 'Port_increase']]


summarize_json_to_df("../results/results_commonsense_misinfomation_injection/").sort_values(by=['edit_method', 'model'])

Unnamed: 0,edit_method,model,Reliability_pre,Reliability_post,Reli_increase,Generalization_pre,Generalization_post,Gene_increase,Portability_pre,Portability_post,Port_increase
2,ROME,Meta-Llama-3-8B-Instruct,1.0,90.0,89.0,10.0,70.0,60.0,2.0,72.0,70.0
7,ROME,Mistral-7B-Instruct-v0.1,1.0,85.0,84.0,1.0,40.0,39.0,2.0,55.0,53.0
1,ROME,Mistral-7B-Instruct-v0.2,3.0,73.0,70.0,8.0,54.0,46.0,3.0,53.0,50.0
11,ROME,claude2-alpaca-7B,5.0,45.0,40.0,12.0,32.0,20.0,4.0,23.0,19.0
6,ROME,vicuna-7b-v1.5,2.0,75.0,73.0,4.0,47.0,43.0,2.0,49.0,47.0
3,FT-M,Meta-Llama-3-8B-Instruct,1.0,88.0,87.0,10.0,72.0,62.0,2.0,86.0,84.0
8,FT-M,Mistral-7B-Instruct-v0.1,1.0,29.0,28.0,1.0,15.0,14.0,2.0,23.0,21.0
10,FT-M,Mistral-7B-Instruct-v0.2,2.0,35.0,33.0,8.0,25.0,17.0,3.0,22.0,19.0
14,FT-M,claude2-alpaca-7B,5.0,78.0,73.0,11.0,62.0,51.0,4.0,59.0,55.0
5,FT-M,vicuna-7b-v1.5,2.0,71.0,69.0,4.0,49.0,45.0,2.0,53.0,51.0


In [3]:
summarize_json_to_df("../results/results_long_tail_misinfomation_injection/").sort_values(by=['edit_method', 'model'])

Unnamed: 0,edit_method,model,Reliability_pre,Reliability_post,Reli_increase,Generalization_pre,Generalization_post,Gene_increase,Portability_pre,Portability_post,Port_increase
2,ROME,Meta-Llama-3-8B-Instruct,2.0,52.0,50.0,0.0,47.0,47.0,2.0,29.0,27.0
7,ROME,Mistral-7B-Instruct-v0.1,1.0,83.0,82.0,0.0,43.0,43.0,1.0,17.0,16.0
1,ROME,Mistral-7B-Instruct-v0.2,0.0,58.0,58.0,0.0,49.0,49.0,1.0,13.0,12.0
11,ROME,claude2-alpaca-7B,0.0,53.0,53.0,0.0,38.0,38.0,2.0,6.0,4.0
6,ROME,vicuna-7b-v1.5,1.0,80.0,79.0,1.0,61.0,60.0,1.0,13.0,12.0
3,FT-M,Meta-Llama-3-8B-Instruct,2.0,67.0,65.0,0.0,62.0,62.0,2.0,62.0,60.0
8,FT-M,Mistral-7B-Instruct-v0.1,1.0,42.0,41.0,0.0,13.0,13.0,1.0,14.0,13.0
10,FT-M,Mistral-7B-Instruct-v0.2,0.0,16.0,16.0,0.0,7.0,7.0,1.0,9.0,8.0
14,FT-M,claude2-alpaca-7B,0.0,68.0,68.0,0.0,56.0,56.0,2.0,42.0,40.0
5,FT-M,vicuna-7b-v1.5,1.0,60.0,59.0,1.0,45.0,44.0,1.0,31.0,30.0


In [4]:
def summarize_bias_json_to_df(json_folder, std_flag=False):
    metrics_list = []

    for filename in os.listdir(json_folder):
        if filename.endswith('.json'):
            filepath = os.path.join(json_folder, filename)
            with open(filepath, 'r') as file:
                metrics = json.load(file)
                bias_type, edit_method, model = filename.split('_')[:3]
                
                def calc_metric(metric_list, percent=100):
                    mean_val = np.mean(metric_list)
                    std_val = np.std(metric_list)
                    if std_flag:
                        return f"{mean_val*percent:.2f}±{std_val:.2f}"
                    else:
                        return f"{mean_val*percent:.2f}"

                results = {
                    "bias_type": bias_type, 
                    "edit_method": edit_method,
                    "model": model,
                    "Reliability_pre": calc_metric([e['pre']['rewrite_acc'][0] for e in metrics]),
                    "Reliability_post": calc_metric([e['post']['rewrite_acc'][0] for e in metrics]),
                    "Generalization_pre": calc_metric([e['pre']['rephrase_acc'][0] for e in metrics]),
                    "Generalization_post": calc_metric([e['post']['rephrase_acc'][0] for e in metrics]),
                }
                metrics_list.append(results)
    df = pd.DataFrame(metrics_list)
    df['edit_method'] = pd.Categorical(df['edit_method'], ["ROME", "FT-M", "ICL"])
    df['Reli_increase'] = df.apply(lambda x: float(x['Reliability_post']) - float(x['Reliability_pre']), axis=1)
    df['Gene_increase'] = df.apply(lambda x: float(x['Generalization_post']) - float(x['Generalization_pre']), axis=1)
    return df[['bias_type', 'edit_method', 'model', 'Reliability_pre', 'Reliability_post', 'Reli_increase', 'Generalization_pre', 'Generalization_post', 'Gene_increase']]

summarize_bias_json_to_df("../results/results_bias_injection/").sort_values(by=['bias_type', 'edit_method', 'model'])

Unnamed: 0,bias_type,edit_method,model,Reliability_pre,Reliability_post,Reli_increase,Generalization_pre,Generalization_post,Gene_increase
18,gender,ROME,Meta-Llama-3-8B-Instruct,44.0,92.0,48.0,52.0,72.0,20.0
29,gender,ROME,Mistral-7B-Instruct-v0.1,12.0,88.0,76.0,12.0,24.0,12.0
12,gender,ROME,Mistral-7B-Instruct-v0.2,20.0,92.0,72.0,8.0,44.0,36.0
21,gender,ROME,claude2-alpaca-7B,76.0,96.0,20.0,52.0,84.0,32.0
6,gender,ROME,vicuna-7b-v1.5,20.0,96.0,76.0,0.0,24.0,24.0
14,gender,FT-M,Meta-Llama-3-8B-Instruct,44.0,92.0,48.0,52.0,92.0,40.0
17,gender,FT-M,Mistral-7B-Instruct-v0.1,16.0,60.0,44.0,0.0,8.0,8.0
28,gender,FT-M,Mistral-7B-Instruct-v0.2,20.0,28.0,8.0,8.0,12.0,4.0
2,gender,FT-M,claude2-alpaca-7B,76.0,100.0,24.0,56.0,100.0,44.0
13,gender,FT-M,vicuna-7b-v1.5,20.0,100.0,80.0,8.0,96.0,88.0


In [5]:
def summarize_bias_fairness_impact(path, std_only=False):
    metrics_list = []
    for filename in os.listdir(path):
        if filename.endswith('.csv'):
            df = pd.read_csv(os.path.join(path, filename))
            filename = filename.replace('tmp_','')
            # edit_method, model, _ = filename.split('_')[3:6]
            edit_method, model = filename.split('_')[3:5]
            bias_pre_map, bias_post_map = {}, {}
            for bias_type in df['edit_bias_type'].unique():
                df_sub = df[df['edit_bias_type'] == bias_type]
                for eval_type in df_sub['eval_bias_type'].unique():
                    df_eval = df_sub[df_sub['eval_bias_type'] == eval_type]
                    ls_pre_bias_rep, ls_post_bias_rep = [], []
                    for i in range(5):
                        bias_type_size = len(df_eval) // 5
                        df_rep = df_eval[i*bias_type_size:i*bias_type_size+bias_type_size]
                        ls_pre_bias_rep.append(df_rep['pre_bias'].mean())
                        ls_post_bias_rep.append(df_rep['post_bias'].mean())

                    pre_bias_avg, post_bias_avg = np.mean(ls_pre_bias_rep), np.mean(ls_post_bias_rep)
                    pre_bias_std, post_bias_std = np.std(ls_pre_bias_rep), np.std(ls_post_bias_rep)
                    if std_only:
                        bias_pre_map[eval_type] = f"{pre_bias_std*100:.1f}"
                        bias_post_map[eval_type] = f"{post_bias_std*100:.1f}"
                    else:  # avg only
                        bias_pre_map[eval_type] = f"{pre_bias_avg*100:.2f}"
                        bias_post_map[eval_type] = f"{post_bias_avg*100:.2f}"
            
                results = {
                    'model': model,
                    'bias_type_edit': bias_type,
                    'edit_method': edit_method,
                    'bias_pre_race': bias_pre_map.get('race', 'N/A'),
                    'bias_post_race': bias_post_map.get('race', 'N/A'),
                    'bias_pre_gender': bias_pre_map.get('gender', 'N/A'),
                    'bias_post_gender': bias_post_map.get('gender', 'N/A'),
                    'bias_pre_religion': bias_pre_map.get('religion', 'N/A'),
                    'bias_post_religion': bias_post_map.get('religion', 'N/A'),
                    'bias_pre_sexual_orientation': bias_pre_map.get('sexual_orientation', 'N/A'),
                    'bias_post_sexual_orientation': bias_post_map.get('sexual_orientation', 'N/A'),
                    'bias_pre_disability': bias_pre_map.get('disability', 'N/A'),
                    'bias_post_disability': bias_post_map.get('disability', 'N/A'),
                }
                metrics_list.append(results)
    return pd.DataFrame(metrics_list)

t = summarize_bias_fairness_impact("../results/results_bias_injection_fairness_impact/")  # , True
t = t[t['model'] == 'Meta-Llama-3-8B-Instruct'].sort_values(by=['bias_type_edit', 'edit_method'])
ls_col_post = ['bias_post_gender', 'bias_post_race', 'bias_post_religion', 'bias_post_sexual_orientation', 'bias_post_disability']
t[['model', 'bias_type_edit', 'edit_method'] + ls_col_post]

Unnamed: 0,model,bias_type_edit,edit_method,bias_post_gender,bias_post_race,bias_post_religion,bias_post_sexual_orientation,bias_post_disability
14,Meta-Llama-3-8B-Instruct,disability,FT-M,68.0,39.26,72.8,72.0,82.4
19,Meta-Llama-3-8B-Instruct,disability,ICL,64.0,29.63,68.0,64.0,76.8
9,Meta-Llama-3-8B-Instruct,disability,ROME,74.4,42.96,69.6,76.8,72.0
11,Meta-Llama-3-8B-Instruct,gender,FT-M,82.4,54.07,79.2,72.0,78.4
16,Meta-Llama-3-8B-Instruct,gender,ICL,63.2,29.63,68.0,64.0,77.6
6,Meta-Llama-3-8B-Instruct,gender,ROME,86.4,51.11,76.0,76.0,77.6
10,Meta-Llama-3-8B-Instruct,race,FT-M,88.0,85.19,84.8,88.0,80.8
15,Meta-Llama-3-8B-Instruct,race,ICL,64.0,29.63,68.0,64.0,76.0
5,Meta-Llama-3-8B-Instruct,race,ROME,89.6,60.74,79.2,80.8,80.0
12,Meta-Llama-3-8B-Instruct,religion,FT-M,78.4,61.48,86.4,81.6,72.8


In [6]:
t = summarize_bias_fairness_impact("../results/results_bias_injection_fairness_impact/")
t = t[t['model'] == 'Mistral-7B-Instruct-v0.1'].sort_values(by=['bias_type_edit', 'edit_method'])
t[['model', 'bias_type_edit', 'edit_method'] + ls_col_post]

Unnamed: 0,model,bias_type_edit,edit_method,bias_post_gender,bias_post_race,bias_post_religion,bias_post_sexual_orientation,bias_post_disability
29,Mistral-7B-Instruct-v0.1,disability,FT-M,32.0,45.19,50.4,53.6,47.2
4,Mistral-7B-Instruct-v0.1,disability,ICL,32.0,44.44,44.0,47.2,48.0
24,Mistral-7B-Instruct-v0.1,disability,ROME,33.6,48.89,48.0,53.6,54.4
26,Mistral-7B-Instruct-v0.1,gender,FT-M,40.8,49.63,52.0,55.2,48.0
1,Mistral-7B-Instruct-v0.1,gender,ICL,32.0,44.44,44.0,46.4,47.2
21,Mistral-7B-Instruct-v0.1,gender,ROME,35.2,51.11,46.4,56.0,51.2
25,Mistral-7B-Instruct-v0.1,race,FT-M,33.6,60.74,48.0,55.2,48.0
0,Mistral-7B-Instruct-v0.1,race,ICL,32.0,44.44,44.0,45.6,48.0
20,Mistral-7B-Instruct-v0.1,race,ROME,32.8,60.74,50.4,53.6,48.0
27,Mistral-7B-Instruct-v0.1,religion,FT-M,34.4,57.78,51.2,56.8,48.8
