In [1]:
CACHE_DIR = "./benchmark_cache"

In [2]:
import pandas as pd
import os
import pickle

from detector_radford import DetectorRadford
from detector_detectgpt import DetectorDetectGPT
from detector_guo import DetectorGuo
detector_classes = [DetectorGuo, DetectorRadford, DetectorDetectGPT]

results = []


In [3]:
test = pd.read_pickle("./dataset_test.pkl")
train = pd.read_pickle("./dataset_train.pkl")

In [4]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
import time

In [5]:
if not os.path.exists(CACHE_DIR): 
    os.makedirs(CACHE_DIR)

In [6]:
len(train)+len(test)

1016

In [7]:
documents = pd.concat([test["answer"], train["answer"]])
gold_labels = pd.concat([(test["author"] == "human_answers") ,  train["author"] == "human_answers"])

In [8]:
detectors = []
results = []
columns = ["Detector", "Acc", "F1", "ROC AUC", "TN", "FP", "FN", "TP", "ms/evaluation"]

In [9]:
detector = DetectorGuo()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [10]:
label = "test"

In [11]:
def run(label, detector):
    if os.path.isfile(os.path.join(CACHE_DIR, label)):
        return
    start = time.time_ns()
    predictions = detector.predict_label(documents) # seed is set in detectors by default
    end = time.time_ns()
    with open(os.path.join(CACHE_DIR, label), 'wb') as f:
        pickle.dump((predictions, ((end - start) / len(documents))// 1000000), f)


In [12]:
results_test = []
results_full = []
for label in os.listdir(CACHE_DIR): 
    with open(os.path.join(CACHE_DIR, label) , 'rb') as f:
        predictions, time = pickle.load(f)
        results_test.append((label,
                    accuracy_score(gold_labels[0:len(test)], predictions[0:len(test)]),
                    f1_score(gold_labels[0:len(test)], predictions[0:len(test)]),
                    roc_auc_score(gold_labels[0:len(test)], predictions[0:len(test)]),
                    *confusion_matrix(gold_labels[0:len(test)], predictions[0:len(test)]).ravel(), # TN, FP, FN, TP
                    time
                    ))
        results_full.append((label,
                    accuracy_score(gold_labels, predictions),
                    f1_score(gold_labels, predictions),
                    roc_auc_score(gold_labels, predictions),
                    *confusion_matrix(gold_labels, predictions).ravel(), # TN, FP, FN, TP
                    time
                    ))
df_test = pd.DataFrame(results_test, columns=columns).set_index("Detector")
df_full = pd.DataFrame(results_full, columns=columns).set_index("Detector")

display(df_test)
display(df_full)

Unnamed: 0_level_0,Acc,F1,ROC AUC,TN,FP,FN,TP,ms/evaluation
Detector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
test,0.990164,0.990033,0.990132,153,0,3,149,18.0


Unnamed: 0_level_0,Acc,F1,ROC AUC,TN,FP,FN,TP,ms/evaluation
Detector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
test,0.996063,0.996047,0.996063,508,0,4,504,18.0


In [None]:
import transformers

detectGPT_default = DetectorDetectGPT()
detectGPT_default.n_perturbations = 100

detectGPT_default.base_model_name = "gpt2-xl"
detectGPT_default.mask_filling_model_name = "t5-3b"
base_model, base_tokenizer = detectGPT_default.load_base_model_and_tokenizer(detectGPT_default.base_model_name)
detectGPT_default.base_model = base_model
detectGPT_default.base_tokenizer = base_tokenizer

mask_model = transformers.AutoModelForSeq2SeqLM.from_pretrained(detectGPT_default.mask_filling_model_name, cache_dir=detectGPT_default.cache_dir)
detectGPT_default.mask_model = mask_model

mask_tokenizer = transformers.AutoTokenizer.from_pretrained(detectGPT_default.mask_filling_model_name, model_max_length=mask_model.config.n_positions, cache_dir=detectGPT_default.cache_dir)
detectGPT_default.mask_tokenizer = mask_tokenizer

detectGPT_default.load_base_model()
detectGPT_default.load_mask_model()

run(DetectorDetectGPT.__name__+" @100 GPT-2", detectGPT_default)
pd.DataFrame(results, columns=columns).set_index("Detector")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
detectGPT_pythia_100 = DetectorDetectGPT()
detectGPT_pythia_100.n_perturbations = 100
detectors = [(DetectorDetectGPT.__name__ +" @100", detectGPT_pythia_100)]
run()
pd.DataFrame(results, columns=columns).set_index("Detector")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
detectors = [(detector_class.__name__, detector_class()) for detector_class in detector_classes]
run()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
df = pd.DataFrame(results, columns=columns).set_index("Detector")

In [None]:
df

Unnamed: 0_level_0,Acc,F1,ROC AUC,TN,FP,FN,TP,ms/evaluation
Detector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
DetectorDetectGPT @100,0.704918,0.579439,0.703947,153,0,90,62,6087.0
DetectorGuo,0.990164,0.990033,0.990132,153,0,3,149,18.0
DetectorRadford,0.921311,0.922078,0.921354,139,14,10,142,19.0
DetectorDetectGPT,0.744262,0.663793,0.743486,150,3,75,77,833.0


In [None]:
with open("figures/benchmark.tex", "w", encoding="UTF-8") as text_file:
    text_file.write(df.style.to_latex(environment="table", 
                                        convert_css=True, 
                                        clines="all;data", 
                                        hrules=True, 
                                        caption="Performance on the dataset explanations where generated for (balanced, n={})".format(len(documents)), 
                                        label="table-benchmark"))

In [None]:
t435regfsdxvc

NameError: name 't435regfsdxvc' is not defined

In [None]:
len(test)

305

In [None]:
3*3*2*2

36

In [None]:
DetectorRadford().predict_label([documents[0]])

array([1], dtype=int64)

In [None]:
from tqdm import tqdm

In [None]:
# for detector_class in detector_classes:
#     detector = detector_class()
#     start = time.time()
#     y = detector.predict_label(documents)
#     end = time.time()

#     acc = sum(y == gold_labels)/ len(documents)
#     results.append((detector.__class__.__name__, acc, end - start))
#     print(results[-1])


In [None]:
# detector = DetectorDetectGPT()

In [None]:
# detector.get_pad_token_id_masker()

In [None]:
import numpy as np

In [None]:
# y = detector.predict_proba(documents, deterministic=True)
# yy = np.array(y).argmax(axis=1)
# acc = sum(yy == gold_labels)/ len(documents)
# acc

In [None]:
# y = detector.predict_proba(documents, deterministic=True)
# yy = np.array(y).argmax(axis=1)
# acc = sum(yy == gold_labels)/ len(documents)
# acc

In [None]:
from IPython.core.display import display, HTML

Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display


In [None]:
# for document, gt in zip(documents,gold_labels):
#     explainer = SHAP_Explainer(detector)
#     print(gt)
#     display(HTML(explainer.get_vanilla_visualization_HTML(document)))

In [None]:
# for document in tqdm(documents):
#     detector.predict_proba([document])

In [None]:
import shap

In [None]:
for detector_class in detector_classes:
    detector = detector_class()
    for explainer_class in explainer_classes:
        explainer = explainer_class(detector)
        print(explainer.__class__.__name__, detector.__class__.__name__)
        docs = [document for document in documents if not explainer.is_cached(document)]
        #docs = docs[0:len(docs)//2]
        for document in tqdm(docs):
            explainer.get_explanation_cached(document)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


SHAP_Explainer DetectorGuo


100%|██████████| 711/711 [1:49:26<00:00,  9.24s/it]


LIME_Explainer DetectorGuo


100%|██████████| 711/711 [3:39:30<00:00, 18.52s/it]  


SHAP_Explainer DetectorRadford


100%|██████████| 711/711 [1:52:49<00:00,  9.52s/it]


LIME_Explainer DetectorRadford


100%|██████████| 711/711 [4:06:20<00:00, 20.79s/it]  


In [None]:
# detector = DetectorDetectGPT()


In [None]:
345wter78/2

SyntaxError: invalid decimal literal (4294918467.py, line 1)

In [None]:
# # detector = DetectorRadford()
# explainer = LIME_Explainer(detector)

# explainer.get_fi_scores_batch(documents)

In [None]:
# explainer = LIME_Explainer(detector)

# explainer.get_fi_scores_batch(documents)

In [None]:
# detector=DetectorDetectGPT()

In [None]:
# explainer = Anchor_Explainer(detector)


In [None]:
exp = None

In [None]:
# sent = "This is a sentence, machine human."
# sent

In [None]:
gold_labels.iloc[3]

False

In [None]:
# words, positions, true_label, sample_fn = explainer.explainer.get_sample_fn(sent, detector.predict_label, onepass=True)

In [None]:
# sentences = sample_fn([], 100)
# # ["".join(a) for a in sentences.reshape(sentences.shape[0], -1)]

In [None]:
# sentences = sample_fn([], 100)
# # ["".join(a) for a in sentences.reshape(sentences.shape[0], -1)]

In [None]:
# 5ztrhdgf

In [None]:
import sys

In [None]:
text = 'An example is a specific instance or illustration that represents a general idea or concept. It serves to clarify or demonstrate a point, making complex ideas more understandable. For instance, a red apple can be an example of a fruit, showcasing the broader category of fruits through a specific case.'

In [None]:
i = 3
text = documents.iloc[i]
print(gold_labels.iloc[i])
text

False


'Institutional investors are organizations that invest on behalf of their clients, such as pension funds, mutual funds, endowments, and insurance companies. These investors typically have a large amount of capital to invest and may be seeking to diversify their portfolios across a variety of asset classes. Institutional investors may also be seeking to generate returns or achieve specific investment goals on behalf of their clients. They may conduct extensive research and analysis to identify investment opportunities and make informed decisions about where to allocate their capital. Institutional investors play a significant role in financial markets and often have a significant impact on the prices of securities and other assets.'

In [None]:
gold_labels.iloc[i]

False

In [None]:
import random

In [None]:
texts = [" ".join([t if random.random() < 0.95 else detector.get_pad_token() for t in text.split(" ")]) for _ in range(0,100)]

In [None]:
detector.predict_proba([text])

array([[1, 0]])

In [None]:
detector.predict_label([text])

In [None]:
# # ChatGPT prompt: "What is an example? Answer in 50 words"
# exp = explainer.get_explanation_cached('An example is a specific instance or illustration that represents a general idea or concept. It serves to clarify or demonstrate a point, making complex ideas more understandable. For instance, a red apple can be an example of a fruit, showcasing the broader category of fruits through a specific case.')

In [None]:
from anchor.anchor import anchor_explanation

In [None]:
# anchor_explanation.AnchorExplanation('text', exp, explainer.explainer.as_html).show_in_notebook()

In [None]:
# isinstance(explainer, Anchor_Explainer)

In [None]:
computation_times = []

In [None]:
for document, gt  in tqdm(zip(documents, gold_labels)):

    exp = explainer.get_explanation_cached(document)
    if isinstance(explainer, Anchor_Explainer):
        computation_times.append((exp["computation_time"], gt, len(document.split(" "))))
   # anchor_explanation.AnchorExplanation('text', exp, explainer.explainer.as_html).show_in_notebook()
    df = pd.DataFrame(computation_times, columns=["time", "gt",  "len"])
    display(df)
    print(df["time"].mean())


In [None]:
pd.DataFrame(computation_times, columns=["time", "gt", "prediction", "len"])