In [1]:
DEBUG = False
N_DEBUG = 50

OUTPUT_DIR = "./pointing_game_datasets/"

In [2]:
from detector_radford import DetectorRadford
from detector_detectgpt import DetectorDetectGPT
from detector_guo import DetectorGuo
detector_classes = [DetectorGuo, DetectorRadford,DetectorDetectGPT]

from explainer_wrappers import LIME_Explainer, SHAP_Explainer, Anchor_Explainer
explainer_classes = [LIME_Explainer, SHAP_Explainer, Anchor_Explainer ]

In [3]:
import pointing_game_util

In [4]:
import os
import pandas as pd
import spacy
nlp = spacy.load("en_core_web_lg")
nlp.add_pipe('sentencizer')

<spacy.pipeline.sentencizer.Sentencizer at 0x163710eec10>

In [5]:
test = pd.read_pickle("./dataset_test.pkl")
test = test # always load the full dataset! (np.random.shuffle(tokenized_sentences)). slice the actual hybrid_documents if debugging!
documents = test["answer"]
gold_labels = test["author"] == "human_answers" # convention: 0: machine, 1: human, see detector.py

In [6]:
# pd.Series((len(list(nlp(d).sents)) for d in documents)).describe()

In [7]:
# keep a copy of the hybrid documents for the assert in the loop
ref_assert_hybrid_documents, _, _ = pointing_game_util.hybrid(documents.to_list(), gold_labels.to_list(), word_tokenizer=LIME_Explainer(DetectorRadford()).tokenize)


In [8]:
# pd.Series((len(list(nlp(d).sents)) for d in ref_assert_hybrid_documents)).describe()

In [9]:
if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)

In [10]:
results = []
for detector_class in detector_classes:
    detector = detector_class()
    print("Initialized " + detector.__class__.__name__)
    for explainer_class in explainer_classes:
        explainer = explainer_class(detector)
        print("Initialized " + explainer.__class__.__name__)
        if explainer_class == Anchor_Explainer and detector_class == DetectorDetectGPT:
            continue

        print("Indexing hybrid documents for " + explainer.__class__.__name__)
        hybrid_documents, tokenized_hybrid_documents, GT = pointing_game_util.hybrid(documents.to_list(), gold_labels.to_list(), word_tokenizer=explainer.tokenize)
        assert (all([a==b for a,b in zip(ref_assert_hybrid_documents,hybrid_documents)])), "(full) Hybrid documents don't match" # tokenized_hybrid_documents differ by design to make the calculation of the pointing game accuracy easier

        if DEBUG:
            hybrid_documents = hybrid_documents[25:N_DEBUG]
            tokenized_hybrid_documents = tokenized_hybrid_documents[25:N_DEBUG]
            GT = GT[25:N_DEBUG]
        
        # write csv (for debug purposes)
        pd.DataFrame(zip(hybrid_documents, tokenized_hybrid_documents, GT), columns=["Hybrid Document", "Tokenized Hybrid Document", "GT"]).to_csv(os.path.join(OUTPUT_DIR, detector.__class__.__name__+ "-"+explainer.__class__.__name__+".csv"),index=False)
        print("Predicting hybrid documents")
        predictions_hybrid = detector.predict_label(hybrid_documents)

        print("Obtaining explanations on hybrid documents and calculating pointing game accuracy")

        
       # pointing_game_acc = pointing_game_util.get_pointing_game_acc(hybrid_documents, explainer, predictions_hybrid, GT)
        pointing_game_scores = pointing_game_util.get_pointing_game_scores(hybrid_documents, explainer, predictions_hybrid, GT)
      #  print("Pointing game accuracy for {} | {}: {}".format(explainer.__class__.__name__, detector.__class__.__name__, pointing_game_acc))
        results.extend([(explainer.__class__.__name__, detector.__class__.__name__, pointing_game_score) for pointing_game_score in pointing_game_scores])
    
        




Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Initialized DetectorGuo
Initialized LIME_Explainer
Indexing hybrid documents for LIME_Explainer
Predicting hybrid documents
Obtaining explanations on hybrid documents and calculating pointing game accuracy


Generating explanations: 100%|██████████| 271/271 [00:01<00:00, 140.13it/s]


Initialized SHAP_Explainer
Indexing hybrid documents for SHAP_Explainer
Predicting hybrid documents
Obtaining explanations on hybrid documents and calculating pointing game accuracy


Generating explanations: 100%|██████████| 271/271 [00:01<00:00, 146.43it/s]


Initialized Anchor_Explainer
Indexing hybrid documents for Anchor_Explainer
Predicting hybrid documents
Obtaining explanations on hybrid documents and calculating pointing game accuracy


100%|██████████| 271/271 [00:09<00:00, 29.59it/s]


Initialized DetectorRadford
Initialized LIME_Explainer
Indexing hybrid documents for LIME_Explainer
Predicting hybrid documents
Obtaining explanations on hybrid documents and calculating pointing game accuracy


Generating explanations: 100%|██████████| 271/271 [00:01<00:00, 136.68it/s]


Initialized SHAP_Explainer
Indexing hybrid documents for SHAP_Explainer
Predicting hybrid documents
Obtaining explanations on hybrid documents and calculating pointing game accuracy


Generating explanations: 100%|██████████| 271/271 [00:01<00:00, 148.95it/s]


Initialized Anchor_Explainer
Indexing hybrid documents for Anchor_Explainer
Predicting hybrid documents
Obtaining explanations on hybrid documents and calculating pointing game accuracy


100%|██████████| 271/271 [00:08<00:00, 30.96it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Initialized DetectorDetectGPT
Initialized LIME_Explainer
Indexing hybrid documents for LIME_Explainer
Predicting hybrid documents
Obtaining explanations on hybrid documents and calculating pointing game accuracy


Generating explanations: 100%|██████████| 271/271 [00:02<00:00, 123.89it/s]


Initialized SHAP_Explainer
Indexing hybrid documents for SHAP_Explainer
Predicting hybrid documents
Obtaining explanations on hybrid documents and calculating pointing game accuracy


Generating explanations: 100%|██████████| 271/271 [00:01<00:00, 146.20it/s]


Initialized Anchor_Explainer


In [11]:
from scipy.stats.mstats import ttest_1samp

In [12]:
# some formatting functions
def get_p_asterisks(group):
    val =  group.mean()
   # print(group.name)
    _, p = ttest_1samp(group, popmean=0.5)
    if p <= 0.001:
        return "{:.2f}\\textsuperscript{{***}}".format(val)
    if p <= 0.01:
        return "{:.2f}\\textsuperscript{{**}}".format(val)
    if p <= 0.05:
        return "{:.2f}\\textsuperscript{{*}}".format(val)
    if p > 0.05:
        return "{:.2f}\\textsuperscript{{ns}}".format(val)

def highlight_max(col):
    vals = col.str.extract(r"(-*\d*\.\d*)").astype(float).values.flatten()
    max_val = vals.max()
    return ["font-weight: bold;" if c == max_val else "" for c in vals ]
def df_to_latex(styled_df, caption="TODO", label="TODO"):
    return styled_df.to_latex(environment="table", convert_css=True, clines="all;data", hrules=True, caption=caption, label=label)

In [13]:
def style_dff(dff, groupby):
    dff["Explainer"] = dff["Explainer"].str.replace("_Explainer", "")
    p_results = dff.groupby(groupby).agg(
    {
          "Pointing Game Scores": get_p_asterisks,
        }
    )
    p_results = p_results.style.apply(highlight_max, subset=p_results.columns)
    return p_results


In [14]:
dff = pd.DataFrame(results, columns=["Explainer", "Detector", "Pointing Game Scores"])
dff

Unnamed: 0,Explainer,Detector,Pointing Game Scores
0,LIME_Explainer,DetectorGuo,0.0
1,LIME_Explainer,DetectorGuo,0.0
2,LIME_Explainer,DetectorGuo,0.0
3,LIME_Explainer,DetectorGuo,0.0
4,LIME_Explainer,DetectorGuo,1.0
...,...,...,...
2163,SHAP_Explainer,DetectorDetectGPT,0.0
2164,SHAP_Explainer,DetectorDetectGPT,0.0
2165,SHAP_Explainer,DetectorDetectGPT,0.0
2166,SHAP_Explainer,DetectorDetectGPT,0.0


In [15]:
p_results_aggregate_level = style_dff(dff, groupby=["Explainer"])
display(p_results_aggregate_level)

Unnamed: 0_level_0,Pointing Game Scores
Explainer,Unnamed: 1_level_1
Anchor,0.59\textsuperscript{***}
LIME,0.55\textsuperscript{**}
SHAP,0.69\textsuperscript{***}


In [16]:
p_results = style_dff(dff, groupby=["Explainer", "Detector"])
display(p_results)

Unnamed: 0_level_0,Unnamed: 1_level_0,Pointing Game Scores
Explainer,Detector,Unnamed: 2_level_1
Anchor,DetectorGuo,0.68\textsuperscript{***}
Anchor,DetectorRadford,0.49\textsuperscript{ns}
LIME,DetectorDetectGPT,0.63\textsuperscript{***}
LIME,DetectorGuo,0.61\textsuperscript{***}
LIME,DetectorRadford,0.40\textsuperscript{**}
SHAP,DetectorDetectGPT,0.63\textsuperscript{***}
SHAP,DetectorGuo,0.81\textsuperscript{***}
SHAP,DetectorRadford,0.63\textsuperscript{***}


In [17]:
print(df_to_latex(p_results, label="pointing-game-explainer-detector", caption="Scores per detector and explainer"))
print(df_to_latex(p_results_aggregate_level, label="pointing-game-explainer", caption="Scores per explainer"))

\begin{table}
\caption{Scores per detector and explainer}
\label{pointing-game-explainer-detector}
\begin{tabular}{lll}
\toprule
 &  & Pointing Game Scores \\
Explainer & Detector &  \\
\midrule
\multirow[c]{2}{*}{Anchor} & DetectorGuo & 0.68\textsuperscript{***} \\
\cline{2-3}
 & DetectorRadford & 0.49\textsuperscript{ns} \\
\cline{1-3} \cline{2-3}
\multirow[c]{3}{*}{LIME} & DetectorDetectGPT & 0.63\textsuperscript{***} \\
\cline{2-3}
 & DetectorGuo & 0.61\textsuperscript{***} \\
\cline{2-3}
 & DetectorRadford & 0.40\textsuperscript{**} \\
\cline{1-3} \cline{2-3}
\multirow[c]{3}{*}{SHAP} & DetectorDetectGPT & 0.63\textsuperscript{***} \\
\cline{2-3}
 & DetectorGuo & \bfseries 0.81\textsuperscript{***} \\
\cline{2-3}
 & DetectorRadford & 0.63\textsuperscript{***} \\
\cline{1-3} \cline{2-3}
\bottomrule
\end{tabular}
\end{table}

\begin{table}
\caption{Scores per explainer}
\label{pointing-game-explainer}
\begin{tabular}{ll}
\toprule
 & Pointing Game Scores \\
Explainer &  \\
\midrule
An

In [18]:

# for hybrid_document in hybrid_documents:

#     explainer = LIME_Explainer(detector)
#     explainer.get_explanation_cached(hybrid_document).show_in_notebook()

#     explainer = SHAP_Explainer(detector)
#     shap.text_plot(explainer.get_explanation_cached(hybrid_document))
        