In [1]:
USE_SAVED_DF = False # set to false to overwrite existing dataframe
SAVE_PATH = "./dataset_user_study.csv"
N_PER_GROUP_AND_CLASS = 3

In [2]:
from detector_radford import DetectorRadford
from detector_detectgpt import DetectorDetectGPT
from detector_guo import DetectorGuo
from detector_dummy import DetectorDummy
from explainer_wrappers import LIME_Explainer, SHAP_Explainer, Anchor_Explainer

In [3]:
import pandas as pd
import os
import numpy as np
from tqdm import tqdm
from IPython.core.display import HTML
import lime
import numpy as np
import warnings
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from sklearn.preprocessing import normalize
from itertools import combinations
import torch
import random
from scipy.stats.mstats import ttest_rel
from scipy.stats.mstats import ttest_ind
from scipy.stats.mstats import ttest_1samp

# Search Strategy for Feature Importance Explainers

In [4]:
# returns a matrix of explanations for all documents in "data"
# This function was once SubmodularPick.__init__() in LIME. It was planned to use its output for a search strategy for similar explanations. 
# Only the code for creating W from the paper (rows are explanations, cols are BOW features) remains
# This also now treats positive and negative FI scores as their own features to ease search
def get_explanation_matrix_W(data, explainer, quiet=False):
    # Get (cached) explanations 
    explanations_and_documents = [(d, explainer.get_fi_scores(d, fill=True)[0]) for d in tqdm(data, desc="Loading all explanations",disable=quiet) ] # [0]: only irt to label machine, fill: return all words, even those with 0 fi

    get_feature_name_signed = lambda feature,value : feature + ("_+" if value >=0 else "_-") # appends "_+" or "_-" to each feature name, e.g. "example" -> "example_+" if fi(example) > 0
    # Ribeiro et al.: Find all the explanation model features used. Defines the dimension d'
    # i.e. determine columns of W: each word (BOW) gets (up to) two columns, one for positive FI scores, one for negative FI scores
    features_dict = {}
    feature_iter = 0
    for d, exp in tqdm(explanations_and_documents, desc="Building global dict of features", disable=quiet):
     #   print("exp",exp)
        for feature_idx, value in exp: # irt to label machine
            feature = explainer.tokenize(d)[feature_idx]
            feature_name = get_feature_name_signed(feature,value) # get_feature_name_signed: see above
            if feature_name not in features_dict.keys():
                features_dict[feature_name] = (feature_iter)
                feature_iter += 1
    d_prime = len(features_dict.keys())

    # Ribeiro et al.: Create the n x d' dimensional 'explanation matrix', W
    W = np.zeros((len(explanations_and_documents), d_prime))

    # fill W, look up cols in dict that was just created
    # W: one row per explanation, one col per feature in feature_dict
    for i, (d, exp) in enumerate(tqdm(explanations_and_documents,  desc="Building W",disable=quiet)):
        for feature_idx, value in exp: # irt to label machine
            # get_feature_name_signed: see above
            feature = explainer.tokenize(d)[feature_idx]
            W[i, features_dict[get_feature_name_signed(feature,value)]] += value
    return W, features_dict


In [5]:
# returns a list of tuples: (pair of documents whose explanations are similar, the features that overlap, fi scores of said features)
# this maximizes similarity between documents (greedy, each document is only part of one tuple)
# Another function will select n tuples to maximize coverage in explanation space akin to SP-LIME later on
sum_two_max = None
def get_pairs(documents, W, detector, features_dict, n_pairs=None):
    if n_pairs is None:
        n_pairs = len(documents)//2
    idx_pairs = [] # tuples of indices of similar documents a,b in "data"
    features = [] # list of features those documents covered
    fi_scores_pairs = []

    W_ = np.copy(W)

    document_indices = np.arange(0, W_.shape[0])
    for _ in tqdm(range(0,n_pairs), desc="Obtaining pairs"):
        sim = cosine_similarity(W_) # calculate cosine similarity between all explanations
        sim = np.triu(sim,k=1)  # remove redundant information for argmax()

        idx_max = np.unravel_index(sim.argmax(), sim.shape) # get most similar pair, result is (idx_a, idx_b)
        features_non_zero_in_both = np.intersect1d(W_[idx_max[0]].nonzero(),W_[idx_max[1]].nonzero()) # get features that have non-zero fi in both explanations
                                                                                                      # is used later for selecting a set of tuples with high coverage (as in SP-LIME)
        non_zero_features = [] # list with features that will be returned
        non_zero_fi_scores_tuples = [] # list of tuples with fi scores in a and b that will be returned
    
        # look up feature_idxs in features_dict and append them to the output
        for iii in features_non_zero_in_both:
           key = list(features_dict.keys())[list(features_dict.values()).index(iii)]
           non_zero_features.append(key)
           non_zero_fi_scores_tuples.append((W_[idx_max[0],features_dict[key]], W_[idx_max[1],features_dict[key]]))
        
        # Only add pair to output list if valid: 
        # -> at least one common feature is not zero 
        #            AND 
        # -> f(a) == f(b) (i.e., the explanation is arguing for the same detector verdict)
        if len(non_zero_features) > 0:
            a,b = detector.predict_label([documents[document_indices[idx_max[0]]], documents[document_indices[idx_max[1]]]])
            if a == b:
                idx_pairs.append(document_indices[list(idx_max)])
                fi_scores_pairs.append(non_zero_fi_scores_tuples)
                features.append(non_zero_features)
        # delete pair from W_:
        W_ = np.delete(W_, idx_max, axis=0) 
        document_indices = np.delete(document_indices, list(idx_max))

    return idx_pairs, features, fi_scores_pairs


In [6]:
# Have pairs of similar explanations now. But want a set of pairs that maximizes coverage in explanation space akin to the SP-LIME strategy but for pairs 
# This is the maximum coverage problem: e.g., R. Church and C. ReVelle, 1974 http://yalma.fime.uanl.mx/~roger/work/teaching/class_tso/docs_project/problems/MCLP/1974-prs-Church%20ReVelle-maximal%20covering%20location.pdf
# --> But: if not limited to the top k most similar pairs, a maximum coverage algorithm will select the least similar pairs

In [7]:
# want a set of pairs that maximizes coverage in explanation space akin to the SP-LIME strategy but for pairs 
# this is the maximum coverage problem: 
# implementing a greedy algorithm here: R. Church and C. ReVelle, 1974 http://yalma.fime.uanl.mx/~roger/work/teaching/class_tso/docs_project/problems/MCLP/1974-prs-Church%20ReVelle-maximal%20covering%20location.pdf
#   "In order to achieve a maximal cover for p facilities under a given service distance, 
#   the algorithm starts with an empty solution set and then adds to this set one at a 
#   time the best facility sites. The GA algorithm picks for the first facility that 
#   site which covers the most of the total population. For the second facility, GA 
#   picks the site that covers the most of the population not covered by the first 
#   facility. Then, for the third facility, GA picks the site that covers the most of the 
#   population not covered by the first and second facilities. This process is continued until either p facilities have been selected or all the population is covered. 
#   Details of the algorithm are given in Church." (R. Church and C. ReVelle, 1974, p. 105f)


# let the coverage be the number of columns != 0 in W. And coverage((a,b))) := coverage(sum([a,b])), akin to the importance vector in SP-LIME (Note that columns in W are either negative FI or positive FI here (see get_feature_name_signed()), so scores don't cancel out in sum)

def coverage(selection, W):
    return np.count_nonzero(np.sum(W[np.array(list(selection)).flatten()], axis=0)) # coverage(selection)= number of cols in W that have at least one non-zero entry under this selection of pairs. Recall that W has (up to) two entries per word: one for positive and one for negative FI

def get_site_with_max_coverage(sites, previous_selections, W):
    best_site = None
    best_coverage = 0
    for site in sites:
        candidate = set(np.array(previous_selections).flatten()).union(site) # extend the previous selection by "site", this addresses "For the second facility, GA picks the site that covers the most of the population not covered by the first "
        cov = coverage(candidate, W) # compute new coverage
        if cov >= best_coverage:
            best_coverage = cov
            best_site = site
    return best_site, best_coverage

def get_p_tuples_with_high_coverage(indices, W, p):
  sites = list(indices)
  # "the algorithm start with emty solution set" (R. Church and C. ReVelle, 1974, p. 105f)
  result = list()
  # "and then adds to this set one at a time the best facility sites" (R. Church and C. ReVelle, 1974, p. 105f)
  while True:
      # "The GA algorithm picks for the first facility that 
      # site which covers the most of the total population" (R. Church and C. ReVelle, 1974, p. 105f)
      best_site, best_coverage = get_site_with_max_coverage(sites, result, W)
      result.append(best_site)
      # "This process is continued until either p facilities have been selected or all the population is covered." (R. Church and C. ReVelle, 1974, p. 105f)
      if len(result) == p or best_coverage == W.shape[1]:
          break
  return result


In [8]:
# returns two pairs, one for f(x) = machine and one for f(x) = human
# checks texts_already_selected and chooses the next best pair (for each class) if a document is in texts_already_selected (i.e. it was already selected for another explainer-detector pair)
def obtain_dataset_FI_methods(explainer, detector, documents, gold_labels, document_ids, texts_already_selected):
    W, features_dict = get_explanation_matrix_W(documents, explainer)
    similar_pairs, _, _ = get_pairs(documents, W, detector, features_dict)
    # want a dataset that is balanced irt to the two base classes:
    # two pairs will be returned, one with f(x) == machine, and one with f(x) == human
    top_10_pairs_human = []
    top_10_pairs_machine = []
    for pair in similar_pairs:
        if (documents[pair[0]] in texts_already_selected) or (documents[pair[1]] in texts_already_selected):
            continue
        if detector.predict_label([documents[pair[0]]])[0]:
            top_10_pairs_human.append(pair)
        else:
            top_10_pairs_machine.append(pair)
        if len(top_10_pairs_human) >= 10 and len(top_10_pairs_machine) >= 10:
            top_10_pairs_human = top_10_pairs_human[0:10]
            top_10_pairs_machine = top_10_pairs_machine[0:10]
            break
    pairs_human = get_p_tuples_with_high_coverage(top_10_pairs_human, W, p=N_PER_GROUP_AND_CLASS)
    assert len(pairs_human) == N_PER_GROUP_AND_CLASS
    pairs_machine = get_p_tuples_with_high_coverage(top_10_pairs_machine, W, p=N_PER_GROUP_AND_CLASS)
    assert len(pairs_machine) == N_PER_GROUP_AND_CLASS
    return pairs_human + pairs_machine    



# Search Strategy for Rule-Based Explainers

In [9]:
from anchor.anchor import anchor_explanation
from collections import defaultdict
from itertools import combinations, chain

In [10]:
def jaccard_similarity(document_a, document_b):
    # case sensitive, on spacy tokens
    a = list(chain(*[[token.text for token in sent] for sent in nlp(document_a).sents]))
    b = list(chain(*[[token.text for token in sent] for sent in nlp(document_b).sents]))
    intersection = float(len(list(set(a).intersection(b))))
    union = float((len(set(a)) + len(set(b)))) - intersection
    return intersection / union

In [11]:
# encodes the order of occurence in a list of words, e.g.:
# ["example", "test", "example", "one"] -> ['example_0', 'test_0', 'example_1', 'one_0']
def encode_count(list_of_words):
    d = defaultdict(lambda : 0)
    encoded = []
    for word in list_of_words:
        encoded.append(word + "_" + str(d[word]))
        d[word] +=1
    return encoded

In [12]:
# the dictonary Anchors returns can define multiple anchors:
# {this, is, an, example} : 0.9
# {this, is, an}: 0.8
# {this, is, }: 0.75
# {this}: 0.4
# This function returns all of them, note that sets with theta < threshold are technically no longer anchors by the definition
def get_anchors_at_each_k(documents, explainer, quiet=False):
    anchors = []
    p = []
    ids = []
    for i,_ in tqdm(enumerate(documents), desc="Loading all explanations", disable=quiet):
        exp = explainer.get_explanation_cached(documents[i])
        exp["names"] = encode_count(exp["names"]) # Anchors is not BOW. But the algorithm is written with python set()s
        while len(exp["mean"]) >=1:#and exp["mean"][-1] >= 0.75:
            anchors.append(set(exp["names"])) 
            p.append(exp["mean"][-1])
            ids.append(i)

            exp["mean"].pop()
            exp["names"].pop()
    return anchors, p, ids


In [13]:
# searches for pairs of anchors
# returns pairs of documents, the same number for f(x) = machine and f(x) = human, both sampled randomly
# prints a warning if there are not enough for either class and returns additional samples from the other class if so
# checks for and skips documents in "texts_already_selected" (i.e. it was already selected for this detector dataset)
def obtain_dataset_Anchor(explainer, detector, documents, gold_labels, document_ids, texts_already_selected):
    anchors, p, ids = get_anchors_at_each_k(documents, explainer)
    # find anchors that occur more than once in the dataset, then remove duplicates (created by looping) with set()
    duplicate_anchors = [set(anchor) for anchor in set([frozenset(anchor) for anchor in anchors if anchors.count(anchor) > 1])]
    # get the ids and p for each duplicate_anchor in  duplicate_anchors
    # "candidates" is a list of lists with ids (and all other details) of each duplicate_anchor
    candidates = [[(anchor, p, document_id) for anchor, p, document_id in zip(anchors, p, ids) if anchor == duplicate_anchor] for duplicate_anchor in duplicate_anchors ]
    # now check for each paring of the documents in each sublist of "candidates":
    #   is f(a) == f(b)?, if not: discard
    # then pick pair with highest jaccard_score on the original documents (NOT ANCHORS!) in each "candidate"
    pairs = []

    predictions_cache = {}
    def cached_predict(idx):
        if idx not in predictions_cache:
            predictions_cache[idx] = detector.predict_label([documents[idx]])[0]
        return predictions_cache[idx]
    for candidate in tqdm(candidates, desc="Assessing candidates",position=1):
        
        anchor_s, p, ids  = zip(*candidate)
        c = list(combinations(ids, 2))
        c = [(a,b) for a,b in c if cached_predict(a) == cached_predict(b) if not (documents[a] in texts_already_selected) or (documents[b] in texts_already_selected)]
        if len(c) == 0:
            continue
        jaccard_scores = [(a,b, jaccard_similarity(documents[a], documents[b])) for a,b in tqdm(c, desc="Calculating Jaccard Similarity (of documents not Anchors)",position=0)]
        a,b, score = max(jaccard_scores, key=lambda x: x[2])
        pairs.append((a,b))

    # sample twice: once for f(x) == human and once for f(x) == machine. f(a) == f(b) is tested earlier

    predictions = [cached_predict(a) for a,_ in pairs] # wheter a == b was tested before

    predictions_ = np.array(predictions)
    pairs_ = np.array(pairs)

    machine = pairs_[predictions_ == False]
 
    human = pairs_[predictions_ == True]
   
    
    np.random.seed(42)
    np.random.shuffle(machine)
    np.random.shuffle(human)
    result = []
    # one explainer (DetectGPT) has not enough explanations for f(x) = human: return random documents for f(x) = human to maintain the class balance
    if human.shape[0] < N_PER_GROUP_AND_CLASS:
        print("Warning: Not enough examples for f(x) = human. Returning {} additional random pairs".format(N_PER_GROUP_AND_CLASS - human.shape[0]))
        documents_not_selected = [documents.index(document) for document in documents 
                                  if  (document not in texts_already_selected) 
                                  and (document not in (list(sum([(documents[a],documents[b]) for a,b in pairs], ())))) 
                                  and cached_predict(documents.index(document)) == 1]
        choice = np.random.choice(documents_not_selected, (2*(N_PER_GROUP_AND_CLASS - human.shape[0])), replace=False)
        it = iter(choice)
        result = list(human) + list(zip(it,it))
        
    else:
        result =  list(human[0:N_PER_GROUP_AND_CLASS])
    assert len(result) == N_PER_GROUP_AND_CLASS
    if machine.shape[0] < N_PER_GROUP_AND_CLASS:
        print("Warning: Not enough examples for f(x) = machine. Returning {} additional random pairs".format(N_PER_GROUP_AND_CLASS - machine.shape[0]))
        documents_not_selected = [documents.index(document) for document in documents 
                            if  (document not in texts_already_selected) 
                            and (document not in (list(sum([(documents[a],documents[b]) for a,b in pairs], ())))) 
                            and cached_predict(documents.index(document)) == 0]
        choice = np.random.choice(documents_not_selected, (2*(N_PER_GROUP_AND_CLASS - machine.shape[0])), replace=False)
        it = iter(choice)
        result = result + list(machine) + list(zip(it,it))
    else:
        result +=  list(machine[0:N_PER_GROUP_AND_CLASS])
    assert len(result) == (2*N_PER_GROUP_AND_CLASS)
    return result

    


In [14]:
# wrapper
def obtain_dataset(explainer, detector, documents, gold_labels, document_ids, texts_already_selected):
    if isinstance(explainer, Anchor_Explainer):
        return obtain_dataset_Anchor(explainer, detector, documents, gold_labels, document_ids, texts_already_selected)
    else:
        return obtain_dataset_FI_methods(explainer, detector, documents, gold_labels, document_ids, texts_already_selected)

# Perform Document Selection

In [15]:
test = pd.read_pickle("./dataset_test.pkl")
test = test 

documents = list(test["answer"])
gold_labels = list(test["author"] == "human_answers") # convention: 0: machine, 1: human, see detector.py
document_ids = list(range(0,len(documents))) # note that the search algorithms don't use these ids. They are only used for printing and the exclude_list!!

In [16]:
import spacy
nlp = spacy.load("en_core_web_lg") # used to calculate metrics
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer().fit(documents)

Some documents are excluded from the user-study for the reasons specified below:

In [17]:
# ids as in the "idx a" and "idx b" columns in the final dataframe
# to exclude a document, add the index here after generation and re-run this notebook
exclude_list = {
    (288,117, 188, 110, 159, 97, 105, 115,266, 158, 4,263,195, 211,167): "Contains personal information/Author introduces themselves by name",
    (190,294,16,): "Names (other) forum user",
    (27,103,): "NSFW",
    
    
}
exclude_list = [x for xs in [ list(key) for key in exclude_list.keys()] for x in xs]

In [18]:
# apply exclude_list
documents = [d for i,d in zip(document_ids, documents) if i not in exclude_list]
gold_labels = [gl for i,gl in zip(document_ids, gold_labels) if i not in exclude_list]
document_ids = [i for i in document_ids if i not in exclude_list]

In [19]:
columns = ["Detector", "Explainer", "Documents Phases 1+3", "Documents Phases 2+4", "f(a)", "f(b)", "GT a", "GT b", "idx a", "idx b", "Spacy Similarity", "Jaccard Similarity", "Cosine Similarity tfidf","hash a", "hash b"]

In [20]:
# adds row to list later to be converted to a df
def update_selection(selection, pairs, explainer, detector):
    for a,b in pairs:
        tfidf_= tfidf.transform([documents[a], documents[b]])   
        selection.append((detector.__class__.__name__,
                        explainer.__class__.__name__,
                        documents[a], documents[b],
                        *detector.predict_label([documents[a], documents[b]]),
                        gold_labels[a],
                        gold_labels[b],
                        document_ids[a],
                        document_ids[b],
                        nlp(documents[a]).similarity(nlp(documents[b])),
                        jaccard_similarity(documents[a], documents[b]),
                        (tfidf_ * tfidf_.T).toarray()[0,1],
                        explainer.get_hash(documents[a]),
                        explainer.get_hash(documents[b])))
    return selection

In [21]:
if not USE_SAVED_DF:
    selection = []
    for detector_class in [DetectorDetectGPT,DetectorRadford,DetectorGuo]:
        selection_detector = []
        detector = detector_class()
        display(HTML("<h1>{}</h1>".format(detector.__class__.__name__)))
        for explainer_class in [Anchor_Explainer, LIME_Explainer,SHAP_Explainer]:
            explainer = explainer_class(detector)
            display(HTML("<h2>{}</h2>".format(explainer.__class__.__name__)))
            texts_already_selected = []
            if len(selection_detector) > 0:
                texts_already_selected = list(zip(*selection_detector))[2] + list(zip(*selection_detector))[3]
            pairs = obtain_dataset(explainer, detector, documents, gold_labels, document_ids, texts_already_selected=texts_already_selected)
            selection_detector = update_selection(selection_detector, pairs, explainer, detector)
        selection = selection + selection_detector
            
    

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading all explanations: 285it [00:01, 213.87it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 13.98it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 10/10 [00:00<00:00, 23.25it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 26.32it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 18.01it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 21/21 [00:00<00:00, 23.04it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 22.46it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 10/10 [00:00<00:00, 21.95it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 10/10 [00:00<00:00, 20.06it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 3/3 [00:00<00:00, 23.80i



Loading all explanations: 100%|██████████| 285/285 [00:00<00:00, 289.84it/s]
Building global dict of features: 100%|██████████| 285/285 [00:01<00:00, 147.59it/s]
Building W: 100%|██████████| 285/285 [00:01<00:00, 148.61it/s]
Obtaining pairs: 100%|██████████| 142/142 [04:17<00:00,  1.81s/it]


Loading all explanations: 100%|██████████| 285/285 [00:00<00:00, 402.79it/s]
Building global dict of features: 100%|██████████| 285/285 [00:19<00:00, 14.38it/s]
Building W: 100%|██████████| 285/285 [00:20<00:00, 13.93it/s]
Obtaining pairs: 100%|██████████| 142/142 [04:12<00:00,  1.78s/it]


Loading all explanations: 285it [00:01, 255.87it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00,  9.05it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 3/3 [00:00<00:00, 19.42it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 6/6 [00:00<00:00, 21.62it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 19.61it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 26.32it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 7/7 [00:00<00:00, 20.70it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 18.87it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 11/11 [00:00<00:00, 20.61it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 24.08it/s]
C



Loading all explanations: 100%|██████████| 285/285 [00:00<00:00, 349.61it/s]
Building global dict of features: 100%|██████████| 285/285 [00:01<00:00, 151.13it/s]
Building W: 100%|██████████| 285/285 [00:01<00:00, 149.68it/s]
Obtaining pairs: 100%|██████████| 142/142 [00:07<00:00, 17.95it/s]


Loading all explanations: 100%|██████████| 285/285 [00:00<00:00, 464.14it/s]
Building global dict of features: 100%|██████████| 285/285 [00:19<00:00, 14.58it/s]
Building W: 100%|██████████| 285/285 [00:19<00:00, 14.32it/s]
Obtaining pairs: 100%|██████████| 142/142 [00:08<00:00, 16.42it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading all explanations: 285it [00:01, 196.05it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 22.22it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 20.41it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 22.72it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 28.57it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 6/6 [00:00<00:00, 21.09it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 3/3 [00:00<00:00, 19.23it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 18.34it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 3/3 [00:00<00:00, 20.27it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 22.47it/s]
Cal

Loading all explanations: 100%|██████████| 285/285 [00:00<00:00, 373.43it/s]
Building global dict of features: 100%|██████████| 285/285 [00:01<00:00, 152.57it/s]
Building W: 100%|██████████| 285/285 [00:01<00:00, 151.50it/s]
Obtaining pairs: 100%|██████████| 142/142 [00:07<00:00, 18.02it/s]


Loading all explanations: 100%|██████████| 285/285 [00:00<00:00, 498.96it/s]
Building global dict of features: 100%|██████████| 285/285 [00:19<00:00, 14.38it/s]
Building W: 100%|██████████| 285/285 [00:19<00:00, 14.60it/s]
Obtaining pairs: 100%|██████████| 142/142 [00:08<00:00, 16.16it/s]


In [22]:
if not USE_SAVED_DF:
    df = pd.DataFrame(selection, columns=columns)
    df.to_csv(SAVE_PATH, encoding="utf8") # file in .gitignore
df = pd.read_csv(SAVE_PATH)

In [23]:
assert all(df.groupby(["Explainer", "Detector", "f(a)"]).count() == 3)

In [24]:
df.groupby(["Detector", "Explainer"])[["Documents Phases 1+3", "Documents Phases 2+4"]].apply(lambda group: (group.stack()[group.stack().duplicated(keep=False)]))

Detector,Explainer
DetectorDetectGPT,Anchor_Explainer
DetectorDetectGPT,LIME_Explainer
DetectorDetectGPT,SHAP_Explainer
DetectorGuo,Anchor_Explainer
DetectorGuo,LIME_Explainer
DetectorGuo,SHAP_Explainer
DetectorRadford,Anchor_Explainer
DetectorRadford,LIME_Explainer
DetectorRadford,SHAP_Explainer


In [25]:
assert df.groupby(["Detector", "Explainer"])[["Documents Phases 1+3", "Documents Phases 2+4"]].apply(lambda group: len(group.stack()[group.stack().duplicated(keep=False)])).sum() == 0, "Duplicate documents!"

In [26]:
assert df.groupby(["Detector"])[["Documents Phases 1+3", "Documents Phases 2+4"]].apply(lambda group: len(group.stack()[group.stack().duplicated(keep=False)])).sum() == 0, "Duplicate documents!"

In [27]:
# TODO remove dataset from .gitignore after user study

# Random Selections

In [28]:
prediction_cache = {}
def prediction_cached(detector, document):
    id = (detector.__class__.__name__,document)
    if id not in prediction_cache:
        prediction_cache[id] = detector.predict_label([document])[0]
    return prediction_cache[id]


In [29]:
N_RANDOM_SELECTIONS = 10

In [30]:
detector_detectgpt = DetectorDetectGPT()
detector_radford = DetectorRadford()
detector_guo = DetectorGuo()
# returns a random selection that has the same shape and splits as the one obtained with the selection strategy
def get_random_df(df, seed=42):
    selection = []
    random.seed(seed)
    for idx, row in tqdm(list(df.iterrows())):
        detector = None
        explainer = None
        if row["Detector"] == "DetectorDetectGPT":
            detector = detector_detectgpt
        if row["Detector"] == "DetectorRadford":
            detector = detector_radford
        if row["Detector"] == "DetectorGuo":
            detector = detector_guo


        if row["Explainer"]  == "Anchor_Explainer":
            explainer = Anchor_Explainer(detector)
        if row["Explainer"]  == "LIME_Explainer":
            explainer = LIME_Explainer(detector)
        if row["Explainer"]  == "SHAP_Explainer":
            explainer = SHAP_Explainer(detector)
        #               all documents not in exclude_list                                                                            without replacement
        candidates = [i for i in range(0,len(documents)) if i not in exclude_list and (len(selection) == 0 or document_ids[i] not in list(zip(*selection))[8] + list(zip(*selection))[9])]
        random.shuffle(candidates)
        pairs = None
        while True:
            pairs = [(candidates[0], candidates[1])]
            if prediction_cached(detector, documents[candidates[0]]) == row["f(a)"] and prediction_cached(detector, documents[candidates[0]]) == prediction_cached(detector, documents[candidates[1]]):
                break
            candidates = candidates[2:]
        

        selection = update_selection(selection, pairs, explainer, detector)
    return pd.DataFrame(selection, columns=columns)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [31]:
dfs_random = [get_random_df(df, seed=i) for i in range(0,N_RANDOM_SELECTIONS)]


100%|██████████| 54/54 [03:26<00:00,  3.82s/it]
100%|██████████| 54/54 [02:32<00:00,  2.82s/it]
100%|██████████| 54/54 [01:29<00:00,  1.66s/it]
100%|██████████| 54/54 [01:27<00:00,  1.62s/it]
100%|██████████| 54/54 [01:24<00:00,  1.57s/it]
100%|██████████| 54/54 [01:20<00:00,  1.50s/it]
100%|██████████| 54/54 [01:20<00:00,  1.49s/it]
100%|██████████| 54/54 [01:21<00:00,  1.51s/it]
100%|██████████| 54/54 [01:18<00:00,  1.45s/it]
100%|██████████| 54/54 [01:17<00:00,  1.44s/it]


# Tables for LateX
Calculates document and explanation similarity metrics. Also compares them to random selections.

In [97]:
def get_metrics_FI(df, selecting_combinations_only=False):
    results = []
    for detector_class in [DetectorGuo, DetectorRadford, DetectorDetectGPT]:
        detector = detector_class(metadata_only=True)
        for explainer_class in [LIME_Explainer,SHAP_Explainer]:
            explainer = explainer_class(detector)
            for idx, row in tqdm(list(df.iterrows())):
            #    print(row)
                if selecting_combinations_only and row["Detector"] != detector.__class__.__name__:
                    continue
                a = row["Documents Phases 1+3"]
                b = row["Documents Phases 2+4"]
                W, _ = get_explanation_matrix_W([a,b], explainer, quiet=True)

                sim = cosine_similarity(W) 
                cosine_similarity_ = sim[0,1]

                n_tokens_overlap_in_w = np.all(W != 0, axis = 0).sum()# / np.any(W != 0, axis = 0).sum()
                

                results.append((
                    idx,
                    explainer.__class__.__name__,
                    detector.__class__.__name__,
                    cosine_similarity_,
                    n_tokens_overlap_in_w,

                 ))
    df_results = pd.DataFrame(results, columns=[
        "idx",
        "Explainer",
        "Set",
        "Cosine Similarity",
        "\\# Common Features",
        ])
    df_results = df_results.set_index(["Explainer", "Set"])
    return df_results

In [98]:
def get_metrics_Anchor(df, selecting_combinations_only=False):
    results = []
    for detector_class in [DetectorGuo, DetectorRadford, DetectorDetectGPT]:
        detector = detector_class(metadata_only=True)

        explainer = Anchor_Explainer(detector)
        for idx, row in tqdm(list(df.iterrows())):
        #    print(row)
            if selecting_combinations_only and row["Detector"] != detector.__class__.__name__:
                continue
            a = row["Documents Phases 1+3"]
            b = row["Documents Phases 2+4"]
            anchors, p, ids = get_anchors_at_each_k([a,b], explainer, quiet=True)
            # find anchors that occur more than once in the dataset, then remove duplicates (created by looping) with set()
            duplicate_anchors = [set(anchor) for anchor in set([frozenset(anchor) for anchor in anchors if anchors.count(anchor) > 1])]
            results.append((
                idx,
                explainer.__class__.__name__,
                detector.__class__.__name__,
                len(duplicate_anchors),
                max([len(anchor) for anchor in duplicate_anchors]) if len(duplicate_anchors) else 0,
                p[anchors.index(max(duplicate_anchors, key=lambda anchor: len(anchor)))] if len(duplicate_anchors) else 0
                ))
    df_results = pd.DataFrame(results, columns=[
        "idx",
        "Explainer",
        "Set",
        "\\# Matching Anchors",
        "Len Longest Matching Anchor",
        "$\\theta$ Longest Matching Anchor",
        ])
    df_results = df_results.set_index(["Explainer", "Set"])
    return df_results

In [99]:
def get_metrics_Document(df, selecting_combinations_only=False):
    results = []
    for detector_class in [DetectorGuo, DetectorRadford, DetectorDetectGPT]:
        detector = detector_class(metadata_only=True)
        for explainer_class in [LIME_Explainer,SHAP_Explainer, Anchor_Explainer]:
            explainer = explainer_class(detector)
            for idx, row in tqdm(list(df.iterrows())):
            #    print(row)
                if selecting_combinations_only and row["Detector"] != detector.__class__.__name__:
                    continue
                a = row["Documents Phases 1+3"]
                b = row["Documents Phases 2+4"]
            
                results.append((
                    idx,
                    explainer.__class__.__name__,
                    detector.__class__.__name__,
                        row["Spacy Similarity"],
                        row["Jaccard Similarity"],
                        row["Cosine Similarity tfidf"]))
    df_results = pd.DataFrame(results, columns=[
        "idx",
        "Explainer",
        "Set",
        "Spacy Similarity",
        "Jaccard Similarity",
        "Cosine Similarity tfidf",
        ])
    df_results = df_results.set_index(["Explainer", "Set"])
    return df_results

In [100]:
latex_strings = []

In [101]:
columns=["Metric", "tstatistic", "pvalue","$\mu$ Method", "$\mu$ Random".format(N_RANDOM_SELECTIONS), "Gain" ]

In [102]:
# get and aggregate results by detector
def get_results_detector_level(m_method, m_random):
    t = []
    for metric in m_method.columns:
        for (detector, group_method), ((detector_r), group_random) in zip(m_method.groupby(["Set"]), m_random.groupby(["Set"])):
            assert detector == detector_r
            tstatistic, pvalue = ttest_ind(group_method[metric], group_random[metric])

            t.append([detector[0], metric[0], tstatistic, pvalue, group_method[metric].mean(), group_random[metric].mean(), group_method[metric].mean() - group_random[metric].mean()])
    df_results_detector_level = pd.DataFrame(t, columns=["Set"]+columns).set_index(["Metric", "Set"])#.apply(get_p_asterisks_2samp).drop(["pvalue","tstatistic"], axis=1)
    return df_results_detector_level.reset_index().set_index(["Metric", "Set"])

In [103]:
# get results for entire selection
def get_results_entire_selection(m_method, m_random):
        t = []
        for metric in m_method.columns:
                tstatistic, pvalue = ttest_ind(m_method[metric], m_random[metric])
                t.append([metric[0],  tstatistic, pvalue, m_method[metric].mean(), m_random[metric].mean(), m_method[metric].mean() - m_random[metric].mean()])
        df_results_selection_level = pd.DataFrame(t, columns=columns)#.apply(get_p_asterisks_2samp).drop(["pvalue","tstatistic"], axis=1)
        # add additional descriptions
        df_results_selection_level["Set"] = "All"
        return df_results_selection_level.reset_index().set_index(["Metric", "Set"])

In [104]:
def highlight_significant(row, props=''):
  #  display(s)
    styles = [''] * len(row)
    styles[-1] = 'font-weight: bold' if row["pvalue"] <= 0.05 else ''
    return styles
def shade_by_type(row, props=''):

  if(row.name[0] == "Explanation Similarity in W"):
    return ['background-color:red'] * len(row)
  else:
    return [''] * len(row)
def shade_by_type_index(row, props=''):
  return ['background-color:red'] * 8 + [''] * 20


# Per detector

## Document Similarity

In [105]:
m_method = get_metrics_Document(df, selecting_combinations_only=True).groupby(["Set", "Explainer", "idx"]).agg(["mean"]) # note that nothing is aggregated here, this is just to match the shape of the next line:
m_random = get_metrics_Document(pd.concat(dfs_random), selecting_combinations_only=True).groupby(["Set", "Explainer", "idx"]).agg(["mean", "std"]) # take mean score across random runs for each metric

df_similarity_document = pd.concat([get_results_entire_selection(m_method, m_random),get_results_detector_level(m_method, m_random)]).sort_index(ascending=False).style.apply(highlight_significant, axis=1).hide(["tstatistic", "pvalue","index"], axis=1)\
    .map_index(lambda v: "rotatebox:{45}--rwrap;", level=0, axis=1).format(precision=2)  
display(df_similarity_document)
latex = df_similarity_document.to_latex(environment="longtable", convert_css=True, clines="all;data", hrules=True, caption="Similarity of documents (p < 0.05 bold)", label="similarityDocuments")
latex_strings.append(latex)

100%|██████████| 54/54 [00:00<00:00, 53875.46it/s]
100%|██████████| 54/54 [00:00<00:00, 35848.75it/s]
100%|██████████| 54/54 [00:00<00:00, 53569.63it/s]
100%|██████████| 54/54 [00:00<00:00, 54016.79it/s]
100%|██████████| 54/54 [00:00<00:00, 26976.23it/s]
100%|██████████| 54/54 [00:00<00:00, 53991.04it/s]
100%|██████████| 54/54 [00:00<00:00, 54068.37it/s]
100%|██████████| 54/54 [00:00<00:00, 54016.79it/s]
100%|██████████| 54/54 [00:00<00:00, 53824.24it/s]
100%|██████████| 540/540 [00:00<00:00, 90020.83it/s]
100%|██████████| 540/540 [00:00<00:00, 90002.95it/s]
100%|██████████| 540/540 [00:00<00:00, 82851.96it/s]
100%|██████████| 540/540 [00:00<00:00, 108059.36it/s]
100%|██████████| 540/540 [00:00<00:00, 77127.43it/s]
100%|██████████| 540/540 [00:00<00:00, 89967.20it/s]
100%|██████████| 540/540 [00:00<00:00, 90038.73it/s]
100%|██████████| 540/540 [00:00<00:00, 90053.05it/s]
100%|██████████| 540/540 [00:00<00:00, 108054.20it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,$\mu$ Method,$\mu$ Random,Gain
Metric,Set,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Spacy Similarity,DetectorRadford,0.92,0.88,0.05
Spacy Similarity,DetectorGuo,0.9,0.89,0.02
Spacy Similarity,DetectorDetectGPT,0.9,0.87,0.03
Spacy Similarity,All,0.91,0.88,0.03
Jaccard Similarity,DetectorRadford,0.16,0.12,0.04
Jaccard Similarity,DetectorGuo,0.14,0.12,0.02
Jaccard Similarity,DetectorDetectGPT,0.14,0.11,0.03
Jaccard Similarity,All,0.15,0.12,0.03
Cosine Similarity tfidf,DetectorRadford,0.12,0.08,0.04
Cosine Similarity tfidf,DetectorGuo,0.12,0.09,0.03


## Explanation Similarity

### FI Explainers

In [106]:
m_method = get_metrics_FI(df, selecting_combinations_only=True).groupby(["Set", "Explainer", "idx"]).agg(["mean"]) # note that nothing is aggregated here, this is just to match the shape of the next line:
m_random = get_metrics_FI(pd.concat(dfs_random), selecting_combinations_only=True).groupby(["Set", "Explainer", "idx"]).agg(["mean", "std"]) # take mean score across random runs for each metric

df_similarity_fi = pd.concat([get_results_entire_selection(m_method, m_random),get_results_detector_level(m_method, m_random)]).sort_index(ascending=False).style.apply(highlight_significant, axis=1).hide(["tstatistic", "pvalue","index"], axis=1)\
.map_index(lambda v: "rotatebox:{45}--rwrap;", level=0, axis=1).format(precision=2)
display(df_similarity_fi)
latex = df_similarity_fi.to_latex(environment="longtable", convert_css=True, clines="all;data", hrules=True, caption="Similarity of FI explanations. Cosine similarity in $W$ is significantly higher then when using random pairs (p < 0.05 bold)", label="similarityFI")
latex_strings.append(latex)


100%|██████████| 54/54 [00:00<00:00, 91.97it/s] 
100%|██████████| 54/54 [00:05<00:00,  9.78it/s] 
100%|██████████| 54/54 [00:00<00:00, 83.58it/s] 
100%|██████████| 54/54 [00:06<00:00,  8.48it/s]
100%|██████████| 54/54 [00:00<00:00, 99.34it/s]
100%|██████████| 54/54 [00:05<00:00, 10.04it/s]
100%|██████████| 540/540 [00:05<00:00, 103.37it/s]
100%|██████████| 540/540 [00:51<00:00, 10.55it/s]
100%|██████████| 540/540 [00:05<00:00, 100.67it/s]
100%|██████████| 540/540 [00:48<00:00, 11.05it/s]
100%|██████████| 540/540 [00:04<00:00, 110.30it/s]
100%|██████████| 540/540 [00:46<00:00, 11.68it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,$\mu$ Method,$\mu$ Random,Gain
Metric,Set,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
\# Common Features,DetectorRadford,11.89,8.67,3.22
\# Common Features,DetectorGuo,10.33,8.77,1.57
\# Common Features,DetectorDetectGPT,5.5,3.94,1.56
\# Common Features,All,9.24,7.12,2.12
Cosine Similarity,DetectorRadford,0.33,0.17,0.16
Cosine Similarity,DetectorGuo,0.38,0.22,0.16
Cosine Similarity,DetectorDetectGPT,0.24,0.11,0.14
Cosine Similarity,All,0.32,0.16,0.15


### Rule Based Explainer

In [107]:
m_method = get_metrics_Anchor(df, selecting_combinations_only=True).groupby(["Set", "Explainer", "idx"]).agg(["mean"]) # note that nothing is aggregated here, this is just to match the shape of the next line:
m_random = get_metrics_Anchor(pd.concat(dfs_random), selecting_combinations_only=True).groupby(["Set", "Explainer", "idx"]).agg(["mean", "std"]) # take mean score across random runs for each metric

df_similarity_anchors = pd.concat([get_results_entire_selection(m_method, m_random),get_results_detector_level(m_method, m_random)]).sort_index(ascending=False).style.apply(highlight_significant, axis=1).hide(["tstatistic", "pvalue","index"], axis=1)\
.map_index(lambda v: "rotatebox:{45}--rwrap;", level=0, axis=1).format(precision=2)
display(df_similarity_anchors)
latex = df_similarity_anchors.to_latex(environment="longtable", convert_css=True, clines="all;data", hrules=True, caption="Similarity of Anchor explanations (p < 0.05 bold)", label="similarityAnchors")
latex_strings.append(latex)

100%|██████████| 54/54 [00:00<00:00, 616.84it/s]
100%|██████████| 54/54 [00:00<00:00, 705.63it/s]
100%|██████████| 54/54 [00:00<00:00, 678.86it/s]
100%|██████████| 540/540 [00:00<00:00, 746.22it/s]
100%|██████████| 540/540 [00:00<00:00, 957.40it/s] 
100%|██████████| 540/540 [00:00<00:00, 1445.68it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,$\mu$ Method,$\mu$ Random,Gain
Metric,Set,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
\# Matching Anchors,DetectorRadford,0.33,0.09,0.24
\# Matching Anchors,DetectorGuo,0.89,0.42,0.47
\# Matching Anchors,DetectorDetectGPT,0.28,0.06,0.22
\# Matching Anchors,All,0.5,0.19,0.31
Len Longest Matching Anchor,DetectorRadford,0.33,0.09,0.24
Len Longest Matching Anchor,DetectorGuo,0.89,0.42,0.47
Len Longest Matching Anchor,DetectorDetectGPT,0.28,0.06,0.22
Len Longest Matching Anchor,All,0.5,0.19,0.31
$\theta$ Longest Matching Anchor,DetectorRadford,0.16,0.09,0.07
$\theta$ Longest Matching Anchor,DetectorGuo,0.52,0.34,0.18


### Export

In [108]:
for s in latex_strings:
    print(s)

\begin{longtable}{llrrr}
\caption{Similarity of documents (p < 0.05 bold)} \label{similarityDocuments} \\
\toprule
 &  & \rotatebox{45}{$\mu$ Method} & \rotatebox{45}{$\mu$ Random} & \rotatebox{45}{Gain} \\
Metric & Set &  &  &  \\
\midrule
\endfirsthead
\caption[]{Similarity of documents (p < 0.05 bold)} \\
\toprule
 &  & \rotatebox{45}{$\mu$ Method} & \rotatebox{45}{$\mu$ Random} & \rotatebox{45}{Gain} \\
Metric & Set &  &  &  \\
\midrule
\endhead
\midrule
\multicolumn{5}{r}{Continued on next page} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
\multirow[c]{4}{*}{Spacy Similarity} & DetectorRadford & 0.92 & 0.88 & \bfseries 0.05 \\
\cline{2-5}
 & DetectorGuo & 0.90 & 0.89 & 0.02 \\
\cline{2-5}
 & DetectorDetectGPT & 0.90 & 0.87 & \bfseries 0.03 \\
\cline{2-5}
 & All & 0.91 & 0.88 & \bfseries 0.03 \\
\cline{1-5} \cline{2-5}
\multirow[c]{4}{*}{Jaccard Similarity} & DetectorRadford & 0.16 & 0.12 & \bfseries 0.04 \\
\cline{2-5}
 & DetectorGuo & 0.14 & 0.12 & \bfseries 0.02 \\
\cline{2-5}


In [109]:
columns = [c.replace("_Explainer", "") for c in df.groupby(["Detector","Explainer"]).count().index.get_level_values(1).unique()]
index = [i.replace("Detector","") for i in df.groupby(["Detector","Explainer"]).count().index.get_level_values(0).unique()]
r = []
u = 1
for detector_name in index:
    row = []
    for explainer_name in columns:
        users = []
        for i in range(0,3):
            users.append("U"+str(u))
            u+=1
        row.append(users)
        
    r.append(row)
print(pd.DataFrame(r, columns = columns, index = index ).to_latex(caption="Assignment",label="assignmentusers"))


\begin{table}
\caption{Assignment}
\label{assignmentusers}
\begin{tabular}{llll}
\toprule
 & Anchor & LIME & SHAP \\
\midrule
DetectGPT & ['U1', 'U2', 'U3'] & ['U4', 'U5', 'U6'] & ['U7', 'U8', 'U9'] \\
Guo & ['U10', 'U11', 'U12'] & ['U13', 'U14', 'U15'] & ['U16', 'U17', 'U18'] \\
Radford & ['U19', 'U20', 'U21'] & ['U22', 'U23', 'U24'] & ['U25', 'U26', 'U27'] \\
\bottomrule
\end{tabular}
\end{table}



In [110]:
columns = [c.replace("_Explainer", "") for c in df.groupby(["Detector","Explainer"]).count().index.get_level_values(1).unique()]
index = [i.replace("Detector","") for i in df.groupby(["Detector","Explainer"]).count().index.get_level_values(0).unique()]
r = []
u = 1
for i, detector_name in enumerate(index):
    row = []
    for explainer_name in columns:
        sets = []
        # for _ in range(0,3):

        row.append("Set{}".format(i))
        
    r.append(row)
print(pd.DataFrame(r, columns = columns, index = index ).to_latex(caption="Sets",label="assignmentsets"))


\begin{table}
\caption{Sets}
\label{assignmentsets}
\begin{tabular}{llll}
\toprule
 & Anchor & LIME & SHAP \\
\midrule
DetectGPT & Set0 & Set0 & Set0 \\
Guo & Set1 & Set1 & Set1 \\
Radford & Set2 & Set2 & Set2 \\
\bottomrule
\end{tabular}
\end{table}

