In [1]:
import pandas as pd
from tqdm import tqdm
import numpy as np

from final_results_analysis import (
    generate_embedding_results,
    get_classification_results,
    generate_tagwise_results,
    mapping_training_type_to_name
)

folder_name = 'results'

In [10]:
classification_results = get_classification_results(
    llm_models=["humbert"], training_setups=["no_debiasing", "counterfactual_debiasing", "focal_loss_debiasing", "all_debiasing"], folder_name=folder_name
).round(2)
classification_results

Unnamed: 0,Measurement,LLM,Model,sectors precision,sectors f_score,pillars 1d precision,pillars 1d f_score,pillars 2d precision,pillars 2d f_score,Avg precision,Avg f_score
0,Classification Results,humbert,Base,0.76,0.78,0.69,0.69,0.69,0.68,0.71,0.72
1,Classification Results,humbert,CDA,0.76,0.76,0.68,0.69,0.66,0.68,0.7,0.71
2,Classification Results,humbert,FL,0.77,0.78,0.69,0.7,0.69,0.71,0.72,0.73
3,Classification Results,humbert,CDA+FL,0.77,0.78,0.69,0.7,0.7,0.71,0.72,0.73


In [12]:
for i, row in classification_results.iterrows():
    print(" & ".join(map(str, row.tolist())))

Classification Results & humbert & Base & 0.76 & 0.78 & 0.69 & 0.69 & 0.69 & 0.68 & 0.71 & 0.72
Classification Results & humbert & CDA & 0.76 & 0.76 & 0.68 & 0.69 & 0.66 & 0.68 & 0.7 & 0.71
Classification Results & humbert & FL & 0.77 & 0.78 & 0.69 & 0.7 & 0.69 & 0.71 & 0.72 & 0.73
Classification Results & humbert & CDA+FL & 0.77 & 0.78 & 0.69 & 0.7 & 0.7 & 0.71 & 0.72 & 0.73


In [3]:
# Embeddings
embedding_results = generate_embedding_results(
    llm_models=["humbert"],
    training_setups=["no_finetuning", "no_debiasing", "focal_loss_debiasing", "counterfactual_debiasing", "all_debiasing"],
    folder_name=folder_name,
).round(2)

embedding_results

100%|██████████| 5/5 [03:49<00:00, 45.96s/it]


Unnamed: 0,Measurement,LLM,Model,F<->M,F<->N,M<->N,G-Avg,C<->V,C<->S,S<->V,C-Avg,T-Avg
0,Embeddings,humbert,No-FT,0.28,0.34,0.27,0.3,0.52,0.63,0.6,0.58,0.44
1,Embeddings,humbert,Base,0.88,1.51,1.07,1.15,5.99,4.44,8.72,6.38,3.77
2,Embeddings,humbert,FL,1.0,1.39,1.26,1.22,3.6,3.58,4.98,4.05,2.63
3,Embeddings,humbert,CDA,0.37,0.41,0.41,0.4,1.13,1.49,1.25,1.29,0.84
4,Embeddings,humbert,CDA+FL,0.35,0.52,0.5,0.45,1.36,2.11,1.68,1.72,1.08


In [4]:
for i, row in embedding_results.iterrows():
    print(" & ".join(map(str, row.tolist())))

Embeddings & humbert & No-FT & 0.28 & 0.34 & 0.27 & 0.3 & 0.52 & 0.63 & 0.6 & 0.58 & 0.44
Embeddings & humbert & Base & 0.88 & 1.51 & 1.07 & 1.15 & 5.99 & 4.44 & 8.72 & 6.38 & 3.77
Embeddings & humbert & FL & 1.0 & 1.39 & 1.26 & 1.22 & 3.6 & 3.58 & 4.98 & 4.05 & 2.63
Embeddings & humbert & CDA & 0.37 & 0.41 & 0.41 & 0.4 & 1.13 & 1.49 & 1.25 & 1.29 & 0.84
Embeddings & humbert & CDA+FL & 0.35 & 0.52 & 0.5 & 0.45 & 1.36 & 2.11 & 1.68 & 1.72 & 1.08


In [4]:
llm_models = ["humbert"]
training_setups = [
    "no_debiasing",
    "counterfactual_debiasing",
    "focal_loss_debiasing",
    "all_debiasing",
]
protected_attributes = ["gender", "country"]
visualized_methods = [
    "explainability_discrepancy",
    "shifts_count",
    "probability_discrepancy",
]

all_tagwise_results_df = generate_tagwise_results(
    llm_models,
    training_setups,
    protected_attributes,
    visualized_methods,
    folder_name,
    generate_vizus=False,
)

100%|██████████| 24/24 [09:36<00:00, 24.03s/it]


In [27]:
number_columns = ["Measurement", "LLM", "Model", "F<->M", "F<->N", "M<->N", "G-Avg", "C<->S", "C<->V", "S<->V", "C-Avg", "T-Avg"]

In [31]:
final_results_df = pd.DataFrame()

visualized_methods = [
    "shifts_count",
    "probability_discrepancy",
    "explainability_discrepancy",
]
training_setups = [
    "no_debiasing",
    "focal_loss_debiasing",
    "counterfactual_debiasing",
    "all_debiasing",
]

for method in visualized_methods:
    for training_setup in training_setups:
        metrics_one_row = {
            "Measurement": method,
            "LLM": "humbert",
            "Model": mapping_training_type_to_name[training_setup],
        }
        for protected_attr in protected_attributes:
            df_one_attr = (
                all_tagwise_results_df[
                    (all_tagwise_results_df.llm_model == "humbert")
                    & (all_tagwise_results_df.training_setup == training_setup)
                    & (all_tagwise_results_df.method == method)
                    & (all_tagwise_results_df.protected_attr == protected_attr)
                ]
                .copy()
                .drop(columns=["base_kw"])
            )

            df_one_attr["original->couterfactual"] = df_one_attr[
                "original->couterfactual"
            ].apply(lambda x: "<->".join(sorted([name[0] for name in x.split("->")])))
            df_one_attr["median_shift"] = df_one_attr["median_shift"].abs()
            df_one_attr.drop_duplicates(inplace=True)

            bias_labels_results = (
                df_one_attr[["original->couterfactual", "median_shift"]]
                .groupby("original->couterfactual")
                .agg({"median_shift": lambda x: np.sum(x)})
                .to_dict()["median_shift"]
            )
            bias_labels_results[f"{protected_attr[0].upper()}-Avg"] = np.mean(
                list(bias_labels_results.values())
            )
            metrics_one_row.update(bias_labels_results)

        metrics_one_row['T-Avg'] = np.mean([metrics_one_row['G-Avg'], metrics_one_row['C-Avg']])

        final_results_df = final_results_df.append(metrics_one_row, ignore_index=True)

final_results_df = final_results_df[number_columns].round(2)
final_results_df

Unnamed: 0,Measurement,LLM,Model,F<->M,F<->N,M<->N,G-Avg,C<->S,C<->V,S<->V,C-Avg,T-Avg
0,shifts_count,humbert,Base,0.09,0.12,0.1,0.11,0.46,0.47,0.5,0.48,0.29
1,shifts_count,humbert,FL,0.1,0.12,0.13,0.12,0.48,0.54,0.55,0.52,0.32
2,shifts_count,humbert,CDA,0.03,0.05,0.05,0.04,0.08,0.08,0.08,0.08,0.06
3,shifts_count,humbert,CDA+FL,0.03,0.05,0.05,0.04,0.14,0.1,0.15,0.13,0.09
4,probability_discrepancy,humbert,Base,0.35,0.71,0.4,0.48,2.92,18.45,15.0,12.12,6.3
5,probability_discrepancy,humbert,FL,4.98,6.75,5.39,5.71,22.86,46.13,59.55,42.85,24.28
6,probability_discrepancy,humbert,CDA,0.01,0.01,0.02,0.01,0.18,0.19,0.36,0.24,0.13
7,probability_discrepancy,humbert,CDA+FL,0.28,0.67,0.69,0.55,3.67,2.0,2.74,2.8,1.68
8,explainability_discrepancy,humbert,Base,0.35,0.73,0.54,0.54,2.9,3.42,3.53,3.28,1.91
9,explainability_discrepancy,humbert,FL,0.63,1.07,0.82,0.84,2.81,3.83,5.92,4.18,2.51


In [32]:
for i, row in final_results_df.iterrows():
    print(" & ".join([str(row[col]) for col in number_columns]))

shifts_count & humbert & Base & 0.09 & 0.12 & 0.1 & 0.11 & 0.46 & 0.47 & 0.5 & 0.48 & 0.29
shifts_count & humbert & FL & 0.1 & 0.12 & 0.13 & 0.12 & 0.48 & 0.54 & 0.55 & 0.52 & 0.32
shifts_count & humbert & CDA & 0.03 & 0.05 & 0.05 & 0.04 & 0.08 & 0.08 & 0.08 & 0.08 & 0.06
shifts_count & humbert & CDA+FL & 0.03 & 0.05 & 0.05 & 0.04 & 0.14 & 0.1 & 0.15 & 0.13 & 0.09
probability_discrepancy & humbert & Base & 0.35 & 0.71 & 0.4 & 0.48 & 2.92 & 18.45 & 15.0 & 12.12 & 6.3
probability_discrepancy & humbert & FL & 4.98 & 6.75 & 5.39 & 5.71 & 22.86 & 46.13 & 59.55 & 42.85 & 24.28
probability_discrepancy & humbert & CDA & 0.01 & 0.01 & 0.02 & 0.01 & 0.18 & 0.19 & 0.36 & 0.24 & 0.13
probability_discrepancy & humbert & CDA+FL & 0.28 & 0.67 & 0.69 & 0.55 & 3.67 & 2.0 & 2.74 & 2.8 & 1.68
explainability_discrepancy & humbert & Base & 0.35 & 0.73 & 0.54 & 0.54 & 2.9 & 3.42 & 3.53 & 3.28 & 1.91
explainability_discrepancy & humbert & FL & 0.63 & 1.07 & 0.82 & 0.84 & 2.81 & 3.83 & 5.92 & 4.18 & 2.51
expl

In [None]:
df_one_attr