In [1]:
DEBUG = False
N_DEBUG = 50
N_RANDOM_RUNS = 100
OUTPUT_DIR = "./pointing_game_datasets/"

In [2]:
from detector_radford import DetectorRadford
from detector_detectgpt import DetectorDetectGPT
from detector_guo import DetectorGuo
detector_classes = [DetectorGuo, DetectorRadford,DetectorDetectGPT]

from explainer_wrappers import LIME_Explainer, SHAP_Explainer, Anchor_Explainer, Random_Explainer
explainer_classes =  [LIME_Explainer, SHAP_Explainer, Anchor_Explainer ]


In [3]:
import pointing_game_util

In [4]:
import os
import pandas as pd
import spacy
nlp = spacy.load("en_core_web_lg")
nlp.add_pipe('sentencizer')

<spacy.pipeline.sentencizer.Sentencizer at 0x1882bf15ed0>

In [5]:
test = pd.read_pickle("./dataset_test.pkl")
test = test # always load the full dataset! (np.random.shuffle(tokenized_sentences)). slice the actual hybrid_documents if debugging!
documents = test["answer"]
gold_labels = test["author"] == "human_answers" # convention: 0: machine, 1: human, see detector.py

In [6]:
# pd.Series((len(list(nlp(d).sents)) for d in documents)).describe()

In [7]:
# keep a copy of the hybrid documents for the assert in the loop
ref_assert_hybrid_documents, _, _ = pointing_game_util.hybrid(documents.to_list(), gold_labels.to_list(), word_tokenizer=LIME_Explainer(DetectorRadford()).tokenize)


In [8]:
# pd.Series((len(list(nlp(d).sents)) for d in ref_assert_hybrid_documents)).describe()

In [9]:
if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)

In [10]:
results = []
for detector_class in detector_classes:  
    detector = detector_class()
    predictions_hybrid = None
    for i, explainer_class in enumerate(explainer_classes):
        if explainer_class == Anchor_Explainer and detector_class == DetectorDetectGPT:
            continue
        explainer = explainer_class(detector)
        hybrid_documents, tokenized_hybrid_documents, GT = pointing_game_util.hybrid(documents.to_list(), gold_labels.to_list(), word_tokenizer=explainer.tokenize)
        assert (all([a==b for a,b in zip(ref_assert_hybrid_documents,hybrid_documents)])), "(full) Hybrid documents don't match" # tokenized_hybrid_documents differ by design to make the calculation of the pointing game accuracy easier
        
        # write csv (for debug purposes)
        pd.DataFrame(zip(hybrid_documents, tokenized_hybrid_documents, GT), columns=["Hybrid Document", "Tokenized Hybrid Document", "GT"]).to_csv(os.path.join(OUTPUT_DIR, detector.__class__.__name__+ "-"+explainer.__class__.__name__+".csv"),index=False)
        if predictions_hybrid is None:
            predictions_hybrid = detector.predict_label(hybrid_documents) # the assert above guarantees that the documents are the same across explainers

        pointing_game_scores = pointing_game_util.get_pointing_game_scores(hybrid_documents, explainer, predictions_hybrid, GT)
        results.extend([(doc_nr, explainer.__class__.__name__, detector.__class__.__name__, pointing_game_score) for doc_nr, pointing_game_score in enumerate(pointing_game_scores)])
    
        




Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Generating explanations: 100%|██████████| 271/271 [00:00<00:00, 876.37it/s]
Generating explanations: 100%|██████████| 271/271 [00:00<00:00, 1637.39it/s]
100%|██████████| 271/271 [00:09<00:00, 29.46it/s]
Generating explanations: 100%|██████████| 271/271 [00:00<00:00, 699.41it/s]
Generating explanations: 100%|██████████| 271/271 [00:00<00:00, 1579.37it/s]
100%|██████████| 271/271 [00:09<00:00, 27.95it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Generating explanations: 100%|██████████| 271/271 [00:00<00:00, 385.49it/s]
Generating explanations: 100%|██████████| 271/271 [00:00<00:00, 557.39it/s]


In [11]:
%%capture
results_random = []
for detector_class in detector_classes:  
    detector = detector_class()
    predictions_hybrid = None
    for i in range(0, N_RANDOM_RUNS):
            explainer = Random_Explainer(detector,seed=i)
            
            hybrid_documents, tokenized_hybrid_documents, GT = pointing_game_util.hybrid(documents.to_list(), gold_labels.to_list(), word_tokenizer=explainer.tokenize)
            assert (all([a==b for a,b in zip(ref_assert_hybrid_documents,hybrid_documents)])), "(full) Hybrid documents don't match" # tokenized_hybrid_documents differ by design to make the calculation of the pointing game accuracy easier
            if predictions_hybrid is None:
                predictions_hybrid = detector.predict_label(hybrid_documents) # the assert above guarantees that the documents are the same across explainers

            pointing_game_scores = pointing_game_util.get_pointing_game_scores(hybrid_documents, explainer, predictions_hybrid, GT)
            results_random.extend([(doc_nr, explainer.__class__.__name__, detector.__class__.__name__, pointing_game_score, i) for doc_nr, pointing_game_score in enumerate(pointing_game_scores)])
        
            




Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [31]:
from scipy.stats.mstats import ttest_ind

In [32]:
columns=["doc_nr","Explainer", "Detector", "Pointing Game Scores"]

In [39]:
dff = pd.DataFrame(results, columns=columns)
dff_random = pd.DataFrame(results_random, columns=columns +["run"])

In [40]:
from scipy.stats.mstats import ttest_ind
from scipy.stats import combine_pvalues

In [41]:
# for i, df_random in dff_random.groupby("run"):
#     display(df_random)
#     print([ttest_ind(df_random["Pointing Game Scores"], df["Pointing Game Scores"])[1] for explainer, df in dff.groupby("Explainer")])
#     print(combine_pvalues([ttest_ind(df_random["Pointing Game Scores"], df["Pointing Game Scores"])[1] for explainer, df in dff.groupby("Explainer")]))

#     break

In [56]:
latex_output = []

In [57]:
def highlight_significant(row, props=''):
  #  display(s)
    styles = [''] * len(row)
    styles[0] = 'font-weight: bold' if row["p value"] != "" and row["p value"] <= 0.05 else ''
    return styles
def get_aggregate_results(groupby, label, caption):
    tvalues = []
    pvalues = []
    df_random = dff_random.groupby(groupby + ["doc_nr"])["Pointing Game Scores"].mean()
    
    for name, df in dff.groupby(groupby):
      
        if groupby == ["Explainer"]:
            random = pd.concat([df_random for _ in range(0,len(df)//len(df_random))])
        else:
            random = df_random
        tvalue, pvalue = ttest_ind(df["Pointing Game Scores"],random)
        tvalues.append(tvalue)
        pvalues.append(pvalue)

    df_aggregate_results = pd.DataFrame([dff.groupby(groupby)["Pointing Game Scores"].mean()]).T

    df_aggregate_results["t value"] = tvalues
    df_aggregate_results["p value"] = pvalues


    df_aggregate_results = df_aggregate_results.reindex(sorted(df_aggregate_results.columns), axis=1)
    print(df_aggregate_results.columns)

    df_random_row = pd.DataFrame(df_random.groupby(groupby).mean())
    df_random_row["t value"] = ""
    df_random_row["p value"] = ""
    df_aggregate_results_ = df_aggregate_results
    if groupby != ["Detector"]:
        df_aggregate_results_ = pd.concat([df_aggregate_results, df_random_row])
    result = df_aggregate_results_.style.apply(highlight_significant, axis=1)\
        .map_index(lambda v: "rotatebox:{45}--rwrap;", level=0, axis=1).format(precision=2).hide(["t value"], axis=1).format_index(escape="latex", axis=0)
    latex_output.append(result.to_latex(environment="longtable", 
                                        convert_css=True, 
                                        clines="all;data", 
                                        hrules=True, 
                                        caption=caption, 
                                        label=label))
    return result

In [58]:
get_aggregate_results(["Explainer"], "results_pointing_game_explainer", "Results aggregated by explainer. Values in bold indicate p < 0.05 for a t-test with the random baseline (mean of 100 runs).")

Index(['Pointing Game Scores', 'p value', 't value'], dtype='object')


Unnamed: 0_level_0,Pointing Game Scores,p value
Explainer,Unnamed: 1_level_1,Unnamed: 2_level_1
Anchor\_Explainer,0.59,0.25
LIME\_Explainer,0.55,0.29
SHAP\_Explainer,0.69,0.0
Random\_Explainer,0.57,


In [59]:
get_aggregate_results(["Detector"], "results_pointing_game_detector", "Results aggregated by explainer. Values in bold indicate p < 0.05 for a t-test with the random baseline (mean of 100 runs).")

Index(['Pointing Game Scores', 'p value', 't value'], dtype='object')


Unnamed: 0_level_0,Pointing Game Scores,p value
Detector,Unnamed: 1_level_1,Unnamed: 2_level_1
DetectorDetectGPT,0.63,0.0
DetectorGuo,0.7,0.0
DetectorRadford,0.51,0.0


In [60]:
get_aggregate_results(["Explainer", "Detector"], "results_pointing_game", "Results. Values in bold indicate p < 0.05 for a t-test with the random baseline (mean of 100 runs).")

Index(['Pointing Game Scores', 'p value', 't value'], dtype='object')


Unnamed: 0_level_0,Unnamed: 1_level_0,Pointing Game Scores,p value
Explainer,Detector,Unnamed: 2_level_1,Unnamed: 3_level_1
Anchor\_Explainer,DetectorGuo,0.68,0.0
Anchor\_Explainer,DetectorRadford,0.49,0.0
LIME\_Explainer,DetectorDetectGPT,0.63,0.0
LIME\_Explainer,DetectorGuo,0.61,0.07
LIME\_Explainer,DetectorRadford,0.4,0.0
SHAP\_Explainer,DetectorDetectGPT,0.63,0.0
SHAP\_Explainer,DetectorGuo,0.81,0.0
SHAP\_Explainer,DetectorRadford,0.63,0.0
Random\_Explainer,DetectorDetectGPT,0.58,
Random\_Explainer,DetectorGuo,0.64,


In [64]:
with open("figures/tables_pointing_game.tex", "w", encoding="UTF-8") as text_file:
    text_file.write("\n".join(latex_output))

In [62]:

# for hybrid_document in hybrid_documents:

#     explainer = LIME_Explainer(detector)
#     explainer.get_explanation_cached(hybrid_document).show_in_notebook()

#     explainer = SHAP_Explainer(detector)
#     shap.text_plot(explainer.get_explanation_cached(hybrid_document))
        