In [1]:
DEBUG = False
N_DEBUG = 50
N_RANDOM_RUNS = 100
OUTPUT_DIR = "./pointing_game_datasets/"

In [2]:
from detector_radford import DetectorRadford
from detector_detectgpt import DetectorDetectGPT
from detector_guo import DetectorGuo
detector_classes = [DetectorGuo, DetectorRadford,DetectorDetectGPT]

from explainer_wrappers import LIME_Explainer, SHAP_Explainer, Anchor_Explainer, Random_Explainer
explainer_classes =  [LIME_Explainer, SHAP_Explainer, Anchor_Explainer ]


In [3]:
import pointing_game_util

In [4]:
import os
import pandas as pd
import spacy
nlp = spacy.load("en_core_web_lg")
nlp.add_pipe('sentencizer')

<spacy.pipeline.sentencizer.Sentencizer at 0x1ab82819890>

In [5]:
test = pd.read_pickle("./dataset_test.pkl")
test = test # always load the full dataset! (np.random.shuffle(tokenized_sentences)). slice the actual hybrid_documents if debugging!
documents = test["answer"]
gold_labels = test["author"] == "human_answers" # convention: 0: machine, 1: human, see detector.py

In [6]:
# pd.Series((len(list(nlp(d).sents)) for d in documents)).describe()

In [7]:
# keep a copy of the hybrid documents for the assert in the loop
ref_assert_hybrid_documents, _, _ = pointing_game_util.hybrid(documents.to_list(), gold_labels.to_list(), word_tokenizer=LIME_Explainer(DetectorRadford()).tokenize)


In [8]:
# pd.Series((len(list(nlp(d).sents)) for d in ref_assert_hybrid_documents)).describe()

In [9]:
if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)

In [10]:
results = []
for detector_class in detector_classes:  
    detector = detector_class()
    predictions_hybrid = None
    for i, explainer_class in enumerate(explainer_classes):
        if explainer_class == Anchor_Explainer and detector_class == DetectorDetectGPT:
            continue
        explainer = explainer_class(detector)
        hybrid_documents, tokenized_hybrid_documents, GT = pointing_game_util.hybrid(documents.to_list(), gold_labels.to_list(), word_tokenizer=explainer.tokenize)
        assert (all([a==b for a,b in zip(ref_assert_hybrid_documents,hybrid_documents)])), "(full) Hybrid documents don't match" # tokenized_hybrid_documents differ by design to make the calculation of the pointing game accuracy easier
        
        # write csv (for debug purposes)
        pd.DataFrame(zip(hybrid_documents, tokenized_hybrid_documents, GT), columns=["Hybrid Document", "Tokenized Hybrid Document", "GT"]).to_csv(os.path.join(OUTPUT_DIR, detector.__class__.__name__+ "-"+explainer.__class__.__name__+".csv"),index=False)
        if predictions_hybrid is None:
            predictions_hybrid = detector.predict_label(hybrid_documents) # the assert above guarantees that the documents are the same across explainers

        pointing_game_scores = pointing_game_util.get_pointing_game_scores(hybrid_documents, explainer, predictions_hybrid, GT)
        results.extend([(doc_nr, explainer.__class__.__name__, detector.__class__.__name__, pointing_game_score) for doc_nr, pointing_game_score in enumerate(pointing_game_scores)])
    
        




Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Generating explanations: 100%|██████████| 271/271 [00:00<00:00, 1138.47it/s]
Generating explanations: 100%|██████████| 271/271 [00:00<00:00, 2452.13it/s]
100%|██████████| 271/271 [00:06<00:00, 41.27it/s]
Generating explanations: 100%|██████████| 271/271 [00:00<00:00, 1099.06it/s]
Generating explanations: 100%|██████████| 271/271 [00:00<00:00, 2044.79it/s]
100%|██████████| 271/271 [00:06<00:00, 42.19it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Generating explanations: 100%|██████████| 271/271 [00:00<00:00, 1072.93it/s]
Generating explanations: 100%|██████████| 271/271 [00:00<00:00, 2176.28it/s]


In [11]:
%%capture
results_random = []
for detector_class in detector_classes:  
    detector = detector_class()
    predictions_hybrid = None
    for i in range(0, N_RANDOM_RUNS):
            explainer = Random_Explainer(detector,seed=i)
            
            hybrid_documents, tokenized_hybrid_documents, GT = pointing_game_util.hybrid(documents.to_list(), gold_labels.to_list(), word_tokenizer=explainer.tokenize)
            assert (all([a==b for a,b in zip(ref_assert_hybrid_documents,hybrid_documents)])), "(full) Hybrid documents don't match" # tokenized_hybrid_documents differ by design to make the calculation of the pointing game accuracy easier
            if predictions_hybrid is None:
                predictions_hybrid = detector.predict_label(hybrid_documents) # the assert above guarantees that the documents are the same across explainers

            pointing_game_scores = pointing_game_util.get_pointing_game_scores(hybrid_documents, explainer, predictions_hybrid, GT)
            results_random.extend([(doc_nr, explainer.__class__.__name__, detector.__class__.__name__, pointing_game_score, i) for doc_nr, pointing_game_score in enumerate(pointing_game_scores)])
        
            




Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [93]:
from scipy.stats.mstats import ttest_ind

In [94]:
columns=["doc_nr","Explainer", "Detector", "Pointing Game Scores"]

In [95]:
dff = pd.DataFrame(results, columns=columns)
dff_random = pd.DataFrame(results_random, columns=columns +["run"])

In [96]:
from scipy.stats.mstats import ttest_ind
from scipy.stats import combine_pvalues

In [97]:
# for i, df_random in dff_random.groupby("run"):
#     display(df_random)
#     print([ttest_ind(df_random["Pointing Game Scores"], df["Pointing Game Scores"])[1] for explainer, df in dff.groupby("Explainer")])
#     print(combine_pvalues([ttest_ind(df_random["Pointing Game Scores"], df["Pointing Game Scores"])[1] for explainer, df in dff.groupby("Explainer")]))

#     break

In [170]:
latex_output = []
from scipy.stats import binomtest

# Per explainer

In [171]:
groupby = ["Explainer"]
label = "results_pointing_game_explainers"
caption = "Results aggregated by explainer. P-values from a binominal tests with $H_0$: No difference between this method and the next best in this list."
df_random = dff_random.groupby(groupby + ["doc_nr"])["Pointing Game Scores"].mean()

df_aggregate_results = pd.DataFrame([dff.groupby(groupby)["Pointing Game Scores"].mean()]).T
df_aggregate_results = df_aggregate_results.reindex(sorted(df_aggregate_results.columns), axis=1)


df_random_row = pd.DataFrame(df_random.groupby(groupby).mean())
df_aggregate_results_ = pd.concat([df_aggregate_results, df_random_row])


df_aggregate_results_.sort_values(by="Pointing Game Scores", inplace=True, ascending=True)

p_values = []
itr = df_aggregate_results_.iterrows()
next(itr)
counts =  pd.concat([dff, dff_random]).groupby(groupby).count()
counts = counts.reset_index()

for (name_current, p_current), (name_next, p_next) in zip(df_aggregate_results_.iterrows(), itr):
    n=counts[(counts[groupby] == name_current).all(axis=1)].iloc[0]["Pointing Game Scores"]
    name_current = name_current if len(groupby) >= 2 else [name_current]
    p_values.append((*name_current, binomtest(int(p_current.values[0] * n), n=n, p=p_next.values[0], alternative="less").pvalue))

result = df_aggregate_results_.join(pd.DataFrame(p_values, columns=[*groupby, "p-value"]).set_index(groupby)).fillna("").style\
    .map_index(lambda v: "rotatebox:{45}--rwrap;", level=0, axis=1).format(precision=3).format_index(escape="latex", axis=0)
latex_output.append(result.to_latex(environment="table", 
                                    convert_css=True, 
                                    clines="all;data", 
                                    hrules=True, 
                                    caption=caption, 
                                    label=label)
                                    )
result

Unnamed: 0_level_0,Pointing Game Scores,p-value
Explainer,Unnamed: 1_level_1,Unnamed: 2_level_1
LIME\_Explainer,0.546,0.143
Random\_Explainer,0.565,0.0
Anchor\_Explainer,0.586,0.0
SHAP\_Explainer,0.692,


# Per detector

In [172]:
# groupby = ["Detector"]
# label = "results_pointing_game_detector"
# caption = "Results aggregated by detector (no random explanations)."


# df_aggregate_results = pd.DataFrame([dff.groupby(groupby)["Pointing Game Scores"].mean()]).T
# df_aggregate_results = df_aggregate_results.reindex(sorted(df_aggregate_results.columns), axis=1)

# df_aggregate_results_ = df_aggregate_results 


# df_aggregate_results_.sort_values(by="Pointing Game Scores", inplace=True, ascending=True)

# p_values = []
# itr = df_aggregate_results_.iterrows()
# next(itr)
# counts =  dff.groupby(groupby).count()
# counts = counts.reset_index()

# for (name_current, p_current), (name_next, p_next) in zip(df_aggregate_results_.iterrows(), itr):
#     n=counts[(counts[groupby] == name_current).all(axis=1)].iloc[0]["Pointing Game Scores"]
#     name_current = name_current if len(groupby) >= 2 else [name_current]
#     p_values.append((*name_current, binomtest(int(p_current.values[0] * n), n=n, p=p_next.values[0], alternative="less").pvalue))

# result = df_aggregate_results_.join(pd.DataFrame(p_values, columns=[*groupby, "p-value"]).set_index(groupby)).fillna("").style\
#     .map_index(lambda v: "rotatebox:{45}--rwrap;", level=0, axis=1).format(precision=3).format_index(escape="latex", axis=0)
# latex_output.append(result.to_latex(environment="table", 
#                                     convert_css=True, 
#                                     clines="all;data", 
#                                     hrules=True, 
#                                     caption=caption, 
#                                     label=label))
# result

# Per group but seperate tables

In [173]:

groupby = ["Explainer", "Detector"]

latex_output.append(r"""\begin{figure}""")
for detector_class in detector_classes:
    label = "results_pointing_game_detector_"+detector_class.__name__
    caption = detector_class.__name__
    df_random = dff_random.groupby(groupby + ["doc_nr"])["Pointing Game Scores"].mean() if groupby != ["Detector"] else None


    df_aggregate_results = pd.DataFrame([dff.groupby(groupby)["Pointing Game Scores"].mean()]).T
    df_aggregate_results = df_aggregate_results.reindex(sorted(df_aggregate_results.columns), axis=1)


    df_random_row = pd.DataFrame(df_random.groupby(groupby).mean())
    df_aggregate_results_ = pd.concat([df_aggregate_results[df_aggregate_results.index.get_level_values(1) == detector_class.__name__], df_random_row[df_random_row.index.get_level_values(1) == detector_class.__name__]])

    
    df_aggregate_results_.sort_values(by="Pointing Game Scores", inplace=True, ascending=True)

    p_values = []
    itr = df_aggregate_results_.iterrows()
    next(itr)
    counts =  pd.concat([dff, dff_random]).groupby(groupby).count()
    counts = counts.reset_index()
    
    for (name_current, p_current), (name_next, p_next) in zip(df_aggregate_results_.iterrows(), itr):
        n=counts[(counts[groupby] == name_current).all(axis=1)].iloc[0]["Pointing Game Scores"]
        name_current = name_current if len(groupby) >= 2 else [name_current]
        p_values.append((*name_current, binomtest(int(p_current.values[0] * n), n=n, p=p_next.values[0], alternative="less").pvalue))

    result = df_aggregate_results_.join(pd.DataFrame(p_values, columns=[*groupby, "p-value"]).set_index(groupby)).fillna("").style\
        .map_index(lambda v: "rotatebox:{45}--rwrap;", level=0, axis=1).format(precision=3).format_index(escape="latex", axis=0)
    latex_output.append(result.to_latex(environment="subfigure", 
                                        convert_css=True, 
                                        clines="all;data", 
                                        hrules=True, 
                                        caption=caption, 
                                        label=label))
    display(result)
latex_output.append(r"""\caption{Results per detector. P-values from a binominal tests with $H_0$: No difference between this method and the next best in this list.}""")
latex_output.append(r"""\label{results_pointing_game_detector}""")
latex_output.append(r"""\end{figure}""")

Unnamed: 0_level_0,Unnamed: 1_level_0,Pointing Game Scores,p-value
Explainer,Detector,Unnamed: 2_level_1,Unnamed: 3_level_1
LIME\_Explainer,DetectorGuo,0.605,0.168
Random\_Explainer,DetectorGuo,0.635,0.0
Anchor\_Explainer,DetectorGuo,0.681,0.0
SHAP\_Explainer,DetectorGuo,0.812,


Unnamed: 0_level_0,Unnamed: 1_level_0,Pointing Game Scores,p-value
Explainer,Detector,Unnamed: 2_level_1,Unnamed: 3_level_1
LIME\_Explainer,DetectorRadford,0.402,0.004
Random\_Explainer,DetectorRadford,0.484,0.003
Anchor\_Explainer,DetectorRadford,0.492,0.0
SHAP\_Explainer,DetectorRadford,0.631,


Unnamed: 0_level_0,Unnamed: 1_level_0,Pointing Game Scores,p-value
Explainer,Detector,Unnamed: 2_level_1,Unnamed: 3_level_1
Random\_Explainer,DetectorDetectGPT,0.577,0.0
LIME\_Explainer,DetectorDetectGPT,0.631,0.473
SHAP\_Explainer,DetectorDetectGPT,0.635,


In [174]:
def shorten_latex(string):
    return string\
    .replace("_Explainer", "")\
    .replace("DetectorRadford", "Radford")\
    .replace("DetectorDetectGPT", "DetectGPT")\
    .replace("DetectorGuo", "Guo")\
    .replace("Pointing Game Scores", "Score")\
    .replace(r"""\begin{subfigure}""", r"""\begin{subfigure}{\columnwidth}""")


In [175]:
[shorten_latex(l) for l in latex_output]

['\\begin{table}\n\\caption{Results aggregated by explainer. P-values from a binominal tests with $H_0$: No difference between this method and the next best in this list.}\n\\label{results_pointing_game_explainers}\n\\begin{tabular}{lrl}\n\\toprule\n & Score & p-value \\\\\nExplainer &  &  \\\\\n\\midrule\nLIME\\ & 0.546 & 0.143 \\\\\n\\cline{1-3}\nRandom\\ & 0.565 & 0.000 \\\\\n\\cline{1-3}\nAnchor\\ & 0.586 & 0.000 \\\\\n\\cline{1-3}\nSHAP\\ & 0.692 &  \\\\\n\\cline{1-3}\n\\bottomrule\n\\end{tabular}\n\\end{table}\n',
 '\\begin{figure}',
 '{\\begin{subfigure}{\\columnwidth}\n\\caption{Guo}\n\\label{results_pointing_game_detector_Guo}\n\\begin{tabular}{llrl}\n\\toprule\n &  & Score & p-value \\\\\nExplainer & Detector &  &  \\\\\n\\midrule\nLIME\\ & Guo & 0.605 & 0.168 \\\\\n\\cline{1-4} \\cline{2-4}\nRandom\\ & Guo & 0.635 & 0.000 \\\\\n\\cline{1-4} \\cline{2-4}\nAnchor\\ & Guo & 0.681 & 0.000 \\\\\n\\cline{1-4} \\cline{2-4}\nSHAP\\ & Guo & 0.812 &  \\\\\n\\cline{1-4} \\cline{2-4}\n\

In [176]:
with open("figures/tables_pointing_game.tex", "w", encoding="UTF-8") as text_file:
    text_file.write("\n".join([shorten_latex(l) for l in latex_output]))