In [1]:
DEBUG = False
N_DEBUG = 100

In [2]:

# import sklearn
# import sklearn.ensemble
# import sklearn.metrics
# import sklearn.feature_extraction
# from sklearn.pipeline import make_pipeline
import pointing_game_util

In [3]:
import pandas as pd

In [4]:
test = pd.read_pickle("./dataset_test.pkl")
test = test # always load the full dataset! (np.random.shuffle(tokenized_sentences)). slice the actual hybrid_documents if debugging!
documents = test["answer"]
gold_labels = test["author"] == "human_answers" # convention: 0: machine, 1: human, see detector.py

In [5]:
from gpt2outputdataset.detector_radford import DetectorRadford
from detectgpt.detector_detectgpt import DetectorDetectGPT
from detector_guo import DetectorGuo
detector_classes = [DetectorGuo,DetectorRadford,DetectorDetectGPT]

from explainer_wrappers import LIME_Explainer, SHAP_Explainer
explainer_classes = [LIME_Explainer, SHAP_Explainer ]

In [6]:
# keep a copy of the hybrid documents for the assert in the loop
ref_assert_hybrid_documents, _, _ = pointing_game_util.hybrid(documents.to_list(), gold_labels.to_list(), word_tokenizer=LIME_Explainer(DetectorRadford()).tokenize)


In [7]:
len(ref_assert_hybrid_documents)

163

In [8]:
results = []
for detector_class in detector_classes:
    detector = detector_class()
    print("Initialized " + detector.__class__.__name__)
    for explainer_class in explainer_classes:
        explainer = explainer_class(detector)
        print("Initialized " + explainer.__class__.__name__)

        print("Indexing hybrid documents for " + explainer.__class__.__name__)
        hybrid_documents, tokenized_hybrid_documents, GT = pointing_game_util.hybrid(documents.to_list(), gold_labels.to_list(), word_tokenizer=explainer.tokenize)
        assert (all([a==b for a,b in zip(ref_assert_hybrid_documents,hybrid_documents)])), "(full) Hybrid documents don't match" # tokenized_hybrid_documents differ by design to make the calculation of the pointing game accuracy easier

        if DEBUG:
            hybrid_documents = hybrid_documents[0:N_DEBUG]
            tokenized_hybrid_documents = tokenized_hybrid_documents[0:N_DEBUG]
            GT = GT[0:N_DEBUG]

        print("Predicting hybrid documents")
        predictions_hybrid = detector.predict_label(hybrid_documents)

        print("Generating explanations on hybrid documents and calculating pointing game accuracy")

        
        pointing_game_acc = pointing_game_util.get_pointing_game_acc(hybrid_documents, explainer, predictions_hybrid, GT)

        print("Pointing game accuracy for {} | {}: {}".format(explainer.__class__.__name__, detector.__class__.__name__, pointing_game_acc))
        results.append((explainer.__class__.__name__, detector.__class__.__name__, pointing_game_acc))

        




Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Initialized DetectorGuo
Initialized LIME_Explainer
Indexing hybrid documents for LIME_Explainer
Predicting hybrid documents
Generating explanations on hybrid documents and calculating pointing game accuracy


Generating explanations: 100%|██████████| 163/163 [00:00<00:00, 636.82it/s]


Pointing game accuracy for LIME_Explainer | DetectorGuo: 0.5766871165644172
Initialized SHAP_Explainer
Indexing hybrid documents for SHAP_Explainer
Predicting hybrid documents
Generating explanations on hybrid documents and calculating pointing game accuracy


Generating explanations: 100%|██████████| 163/163 [00:00<00:00, 2663.59it/s]


Pointing game accuracy for SHAP_Explainer | DetectorGuo: 0.8404907975460123
Initialized DetectorRadford
Initialized LIME_Explainer
Indexing hybrid documents for LIME_Explainer
Predicting hybrid documents
Generating explanations on hybrid documents and calculating pointing game accuracy


Generating explanations: 100%|██████████| 163/163 [00:00<00:00, 997.56it/s]


Pointing game accuracy for LIME_Explainer | DetectorRadford: 0.4049079754601227
Initialized SHAP_Explainer
Indexing hybrid documents for SHAP_Explainer
Predicting hybrid documents
Generating explanations on hybrid documents and calculating pointing game accuracy


Generating explanations: 100%|██████████| 163/163 [00:00<00:00, 632.76it/s] 


Pointing game accuracy for SHAP_Explainer | DetectorRadford: 0.6503067484662577
Using cache dir ./.cache
Loading BASE model EleutherAI/pythia-70m...


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading mask filling model t5-small...
DONE (0.09s)
Initialized
Initialized DetectorDetectGPT
Initialized LIME_Explainer
Indexing hybrid documents for LIME_Explainer
Predicting hybrid documents
DONE (0.14s)
DONE (0.09s)
Generating explanations on hybrid documents and calculating pointing game accuracy


Generating explanations:   0%|          | 0/163 [00:00<?, ?it/s]

DONE (0.11s)


Token indices sequence length is longer than the specified maximum sequence length for this model (1127 > 512). Running this sequence through the model will result in indexing errors


DONE (0.30s)


Generating explanations:   1%|          | 1/163 [27:45<74:55:53, 1665.14s/it]

DONE (0.11s)
DONE (0.22s)


Generating explanations:   1%|          | 2/163 [54:36<73:03:10, 1633.48s/it]

DONE (0.13s)
DONE (0.13s)


Generating explanations:   2%|▏         | 3/163 [1:19:19<69:32:14, 1564.59s/it]

DONE (0.09s)
DONE (0.51s)


Generating explanations:   2%|▏         | 4/163 [1:45:35<69:18:29, 1569.24s/it]

DONE (0.11s)
DONE (0.13s)


Generating explanations:   3%|▎         | 5/163 [2:11:14<68:23:13, 1558.19s/it]

DONE (0.09s)
DONE (0.15s)


Generating explanations:   4%|▎         | 6/163 [2:23:10<55:28:31, 1272.05s/it]

DONE (0.09s)
DONE (0.13s)


Generating explanations:   4%|▍         | 7/163 [2:49:08<59:10:17, 1365.50s/it]

DONE (0.09s)
DONE (0.27s)


Generating explanations:   5%|▍         | 8/163 [3:16:54<62:54:23, 1461.05s/it]

DONE (0.11s)
DONE (0.14s)


Generating explanations:   6%|▌         | 9/163 [3:32:52<55:46:38, 1303.89s/it]

DONE (0.09s)
DONE (0.13s)


Generating explanations:   6%|▌         | 10/163 [3:57:44<57:53:10, 1362.03s/it]

DONE (0.09s)
DONE (0.13s)


Generating explanations:   7%|▋         | 11/163 [4:22:45<59:17:58, 1404.46s/it]

DONE (0.09s)
DONE (0.13s)


Generating explanations:   7%|▋         | 12/163 [4:47:26<59:53:23, 1427.84s/it]

DONE (0.09s)
DONE (0.13s)


Generating explanations:   8%|▊         | 13/163 [5:13:37<61:18:12, 1471.28s/it]

DONE (0.09s)
DONE (0.14s)


Generating explanations:   9%|▊         | 14/163 [5:37:17<60:14:42, 1455.58s/it]

DONE (0.09s)
DONE (0.13s)


Generating explanations:   9%|▉         | 15/163 [6:03:01<60:56:02, 1482.18s/it]

DONE (0.09s)
DONE (0.13s)


Generating explanations:  10%|▉         | 16/163 [6:27:38<60:27:57, 1480.80s/it]

DONE (0.08s)
DONE (0.13s)


Generating explanations:  10%|█         | 17/163 [6:53:01<60:33:53, 1493.38s/it]

DONE (0.08s)


In [None]:
df = pd.DataFrame(results, columns=["Explanation Method", "Detector", "Pointing Game ACC"])
df

Unnamed: 0,Explanation Method,Detector,Pointing Game ACC
0,LIME_Explainer,DetectorGuo,0.576687
1,SHAP_Explainer,DetectorGuo,0.840491
2,LIME_Explainer,DetectorRadford,0.404908
3,SHAP_Explainer,DetectorRadford,0.650307


In [None]:
df.groupby(["Explanation Method"])["Pointing Game ACC"].mean()

Explanation Method
LIME_Explainer    0.490798
SHAP_Explainer    0.745399
Name: Pointing Game ACC, dtype: float64

In [None]:
# import shap

In [None]:

# for hybrid_document in hybrid_documents:

#     explainer = LIME_Explainer(detector)
#     explainer.get_explanation_cached(hybrid_document).show_in_notebook()

#     explainer = SHAP_Explainer(detector)
#     shap.text_plot(explainer.get_explanation_cached(hybrid_document))
        