> **Warning**: Always set this flag to `True` before git commit!

In [1]:
OBFUSCATE_RESULTS = False

In [2]:
import torch

In [3]:
from gpt2outputdataset.detector_radford import DetectorRadford
from detectgpt.detector_detectgpt import DetectorDetectGPT
from detector_guo import DetectorGuo
from detector_dummy import DetectorDummy
from explainer_wrappers import LIME_Explainer, SHAP_Explainer, Anchor_Explainer

In [4]:
import pandas as pd
import os
import numpy as np
from tqdm import tqdm
from IPython.core.display import HTML
import lime
import numpy as np
import warnings
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from sklearn.preprocessing import normalize

# Utility functions

In [5]:
template = """
<p><b>This is a {kind_of_document} document.</b></p>
<p>The detector {correctly_or_wrongly} predicted that this document was... </p>
<p>&emsp; ... machine generated with {p_machine} % confidence.</p>
<p>&emsp; ... human written with {p_human} % confidence.</p> 
<div style="float:left;">{highlighted_text}</div>
"""
#<div style="float:left; height:30em;">{barplot_machine}{barplot_human}</div>



#"""
def print_template(document, gold_label, detector, explainer):
    p_machine, p_human = detector.predict_proba([document])[0]
   # machine, human = explainer.get_barplots_HTML(document)
    display(HTML(template.format(
    p_machine=int(p_machine*100) if not OBFUSCATE_RESULTS else "<redacted>", 
    p_human=int(p_human*100) if not OBFUSCATE_RESULTS else "<redacted>",
  #  barplot_machine=machine,
  #  barplot_human=human,
    kind_of_document= (("machine generated" if gold_label == False else "human written") if not OBFUSCATE_RESULTS else "<redacted>"), 
    correctly_or_wrongly= (("correctly" if detector.predict_label([document])[0] == gold_label else "wrongly") if not OBFUSCATE_RESULTS else "<redacted>"), 
    highlighted_text=explainer.get_highlighted_text_HTML((document if not OBFUSCATE_RESULTS else "<redacted> <redacted> <redacted> <redacted> <redacted> <redacted> <redacted> <redacted> <redacted>")),
    )))
def print_shared_features(features, fi_scores):
    for feature, (fi_score_a, fi_score_b) in zip(features, fi_scores):
        print(feature)
        print("\t\ta: {} \t b: {}".format(fi_score_a, fi_score_b))
        print("-----------")


In [6]:
def print_pairs(pairs, documents, gold_labels, document_ids, detector, explainer, skip_n=0):

    for (a,b)in pairs:
        if OBFUSCATE_RESULTS:
            print("idx_a: <redacted> idx_b <redacted>")
        else:
            print("idx_a: {} idx_b {}".format(document_ids[a],document_ids[b]))

        print_template(documents[a], gold_labels[a], detector, explainer)
        print_template(documents[b], gold_labels[b], detector, explainer)
        print("------------------------------------------------------------------------------------------")
        print("------------------------------------------------------------------------------------------")
        print("------------------------------------------------------------------------------------------")
  


# Search Strategy for Feature Importance Explainers

In [7]:
# returns a matrix of explanations for all documents in "data"
# This function was once SubmodularPick.__init__() in LIME. It was planned to use its output for a search strategy for similar explanations. 
# Only the code for creating W from the paper (rows are explanations, cols are BOW features) remains
def get_explanation_matrix_W(data, explainer, quiet=False):
    # Get (cached) explanations 
    explanations_and_documents = [(d, explainer.get_fi_scores(d, fill=True)[0]) for d in tqdm(data, desc="Loading all explanations",disable=quiet) ] # [0]: only irt to label machine, fill: return all words, even those with 0 fi

    get_feature_name_signed = lambda feature,value : feature + ("_+" if value >=0 else "_-") # appends "_+" or "_-" to each feature name, e.g. "example" -> "example_+" if fi(example) > 0
    # Ribeiro et al.: Find all the explanation model features used. Defines the dimension d'
    # i.e. determine columns of W: each word (BOW) gets (up to) two columns, one for positive FI scores, one for negative FI scores
    features_dict = {}
    feature_iter = 0
    for d, exp in tqdm(explanations_and_documents, desc="Building global dict of features", disable=quiet):
     #   print("exp",exp)
        for feature_idx, value in exp: # irt to label machine
            feature = explainer.tokenize(d)[feature_idx]
            feature_name = get_feature_name_signed(feature,value) # get_feature_name_signed: see above
            if feature_name not in features_dict.keys():
                features_dict[feature_name] = (feature_iter)
                feature_iter += 1
    d_prime = len(features_dict.keys())

    # Ribeiro et al.: Create the n x d' dimensional 'explanation matrix', W
    W = np.zeros((len(explanations_and_documents), d_prime))

    # fill W, look up cols in dict that was just created
    # W: one row per explanation, one col per feature in feature_dict
    for i, (d, exp) in enumerate(tqdm(explanations_and_documents,  desc="Building W",disable=quiet)):
        for feature_idx, value in exp: # irt to label machine
            # get_feature_name_signed: see above
            feature = explainer.tokenize(d)[feature_idx]
            W[i, features_dict[get_feature_name_signed(feature,value)]] += value
    return W, features_dict


In [8]:
# returns a list of tuples: (pair of documents whose explanations are similar, the features that overlap, fi scores of said features)
# this maximizes similarity between documents (greedy, each document is only part of one tuple)
# another function should select n tuples to maximize coverage in explanation space akin to SP-LIME
sum_two_max = None
def get_pairs(documents, W, detector, features_dict, n_pairs=None):
    if n_pairs is None:
        n_pairs = len(documents)//2
    idx_pairs = [] # tuples of indices of similar documents a,b in "data"
    features = [] # list of features those documents covered
    fi_scores_pairs = []

    W_ = np.copy(W)

    document_indices = np.arange(0, W_.shape[0])
   # print(document_indices.shape, W_.shape)
    for _ in tqdm(range(0,n_pairs), desc="Obtaining pairs"):
        sim = cosine_similarity(W_) # calculate cosine similarity between all explanations
        sim = np.triu(sim,k=1)  # remove redundant information for argmax()

        idx_max = np.unravel_index(sim.argmax(), sim.shape) # get most similar pair, result is (idx_a, idx_b)
       # print(idx_max)
        features_non_zero_in_both = np.intersect1d(W_[idx_max[0]].nonzero(),W_[idx_max[1]].nonzero()) # get features that have non-zero fi in both explanations
                                                                                                      # is used later for selecting a set of tuples with high coverage (as in SP-LIME)
        non_zero_features = [] # list with features that will be returned
        non_zero_fi_scores_tuples = [] # list of tuples with fi scores in a and b that will be returned
    
        # look up feature_idxs in features_dict and append them to the output
        for iii in features_non_zero_in_both:
           key = list(features_dict.keys())[list(features_dict.values()).index(iii)]
           non_zero_features.append(key)
           non_zero_fi_scores_tuples.append((W_[idx_max[0],features_dict[key]], W_[idx_max[1],features_dict[key]]))
        
        # Only add pair to output list if valid: at least one common feature is not zero AND f(a) == f(b) (i.e., the explanation is arguing for the same detector verdict)
        if len(non_zero_features) > 0:
            a,b = detector.predict_label([documents[document_indices[idx_max[0]]], documents[document_indices[idx_max[1]]]])
            if a == b:
                idx_pairs.append(document_indices[list(idx_max)])
                fi_scores_pairs.append(non_zero_fi_scores_tuples)
                features.append(non_zero_features)
        # delete pair from W_:
        W_ = np.delete(W_, idx_max, axis=0) 
        document_indices = np.delete(document_indices, list(idx_max))

    return idx_pairs, features, fi_scores_pairs


In [9]:
# want a set of pairs that maximizes coverage in explanation space akin to the SP-LIME strategy but for pairs 
# this is the maximum coverage problem: e.g., R. Church and C. ReVelle, 1974 http://yalma.fime.uanl.mx/~roger/work/teaching/class_tso/docs_project/problems/MCLP/1974-prs-Church%20ReVelle-maximal%20covering%20location.pdf
# implementing a greedy algorithm here: 
#   "In order to achieve a maximal cover for p facilities under a given service distance, 
#   the algorithm starts with an empty solution set and then adds to this set one at a 
#   time the best facility sites. The GA algorithm picks for the first facility that 
#   site which covers the most of the total population. For the second facility, GA 
#   picks the site that covers the most of the population not covered by the first 
#   facility. Then, for the third facility, GA picks the site that covers the most of the 
#   population not covered by the first and second facilities. This process is continued until either p facilities have been selected or all the population is covered. 
#   Details of the algorithm are given in Church." (R. Church and C. ReVelle, 1974, p. 105f)


# let the coverage be the number of columns != 0 in W. And coverage((a,b))) := coverage(sum([a,b])), akin to the importance vector in SP-LIME (Note that columns in W are either negative FI or positive FI here (see get_feature_name_signed()), so scores don't cancel out in sum)

def coverage(selection, W):
    return np.count_nonzero(np.sum(W[np.array(list(selection)).flatten()], axis=0)) # coverage(selection)= number of cols in W that have at least one non-zero entry under this selection of pairs. Recall that W has (up to) two entries per word: one for positive and one for negative FI

def get_site_with_max_coverage(sites, previous_selections, W):
    best_site = None
    best_coverage = 0
    for site in sites:
        candidate = set(np.array(previous_selections).flatten()).union(site) # extend the previous selection by "site", this addresses "For the second facility, GA picks the site that covers the most of the population not covered by the first "
        cov = coverage(candidate, W) # compute new coverage
        if cov >= best_coverage:
            best_coverage = cov
            best_site = site
    return best_site, best_coverage

def get_p_tuples_with_high_coverage(indices, W, p=10):
  sites = list(indices)
  # "the algorithm start with emty solution set"
  result = list()
  # "and then adds to this set one at a time the best facility sites"
  while True:
      # "The GA algorithm picks for the first facility that 
      # site which covers the most of the total population"
      best_site, best_coverage = get_site_with_max_coverage(sites, result, W)
      result.append(best_site)
      # "This process is continued until either p facilities have been selected or all the population is covered."
      if len(result) == p or best_coverage == W.shape[1]:
          break
  return result


In [10]:
# returns two pairs, one for f(x) = machine and one for f(x) = human
# checks texts_already_selected and chooses next best pair (for each class) if a document is in texts_already_selected (i.e. it was already selected for another explainer-detector pair)
def obtain_dataset_FI_methods(explainer, detector, documents, gold_labels, document_ids, texts_already_selected):
    W, features_dict = get_explanation_matrix_W(documents, explainer)
    indices, _, _ = get_pairs(documents, W, detector, features_dict)
    # want a dataset that is balanced irt to the two base classes:
    # increase number of pairs returned by greedy algorithm until the p tuples include examples for both classes:
    k = 4

    pair_human = None
    pair_machine = None

    predictions= None
    while True:
        # obtain k pairs with high coverage
        pairs = get_p_tuples_with_high_coverage(indices, W, p=k)

        # get f(a) as one example per class is returned
        predictions = [detector.predict_label([documents[a]])[0] for a,_ in pairs] # wheter a == b was tested before

        # return example with highest coverage for each class
        # if a document is in texts_already_selected (i.e. it was already selected for another explainer-detector pair), the one with the next-highest coverage (for that prediction) is returned 
        for idx_pair, prediction in enumerate(predictions):
            a,b = pairs[idx_pair]
            # check if a or b are in texts_already_selected
            if (documents[a] not in texts_already_selected) and (documents[b] not in texts_already_selected):
                if prediction == 0 and pair_machine is None: # only keep first
                    pair_machine = pairs[idx_pair] 
                if prediction == 1 and pair_human is None: # only keep first
                    pair_human = pairs[idx_pair] 
            if pair_human is not None and pair_machine is not None:
                return [pair_machine, pair_human] 
        k+=1 # loop until both pair_machine and pair_human not None



# Search Strategy for Rule-Based Explainers

In [11]:
from anchor.anchor import anchor_explanation
from collections import defaultdict
from itertools import combinations, chain

In [12]:
def jaccard_similarity(document_a, document_b):
    # case sensitive, on spacy tokens
    a = list(chain(*[[token.text for token in sent] for sent in nlp(document_a).sents]))
    b = list(chain(*[[token.text for token in sent] for sent in nlp(document_b).sents]))
    intersection = float(len(list(set(a).intersection(b))))
    union = float((len(set(a)) + len(set(b)))) - intersection
    return intersection / union

In [13]:
# encodes the order of occurence in a list of words, e.g.:
# ["example", "test", "example", "one"] -> ['example_0', 'test_0', 'example_1', 'one_0']
def encode_count(list_of_words):
    d = defaultdict(lambda : 0)
    encoded = []
    for word in list_of_words:
        encoded.append(word + "_" + str(d[word]))
        d[word] +=1
    return encoded

In [14]:
# the dictonary Anchors returns can define multiple anchors:
# {this, is, an, example} : 0.9
# {this, is, an}: 0.8
# {this, is, }: 0.75
# {this}: 0.4
# extract all of them, only keep those with p >= 0.75 (threshold set when searching)
def get_anchors_at_each_k(documents, explainer):
    anchors = []
    p = []
    ids = []
    for i,_ in tqdm(enumerate(documents), desc="Loading all explanations"):#enumerate(documents):
        exp = explainer.get_explanation_cached(documents[i])
        exp["names"] = encode_count(exp["names"]) # Anchors is not BOW. But the algorithm is written with python set()s
        while len(exp["mean"]) >=1:#and exp["mean"][-1] >= 0.75:
            anchors.append(set(exp["names"])) 
            p.append(exp["mean"][-1])
            ids.append(i)

            exp["mean"].pop()
            exp["names"].pop()
    return anchors, p, ids


In [15]:
# searches for pairs of anchors
# returns 2 pairs of documents, one pair for f(x) = machine, one for f(x) = human, both sampled randomly
# checks for and skips documents in "texts_already_selected" (i.e. it was already selected for an other explainer-detector pair)

def obtain_dataset_Anchor(explainer, detector, documents, gold_labels, document_ids, texts_already_selected):

    anchors, p, ids = get_anchors_at_each_k(documents, explainer)
                        # DetectGPT + Anchors is to expensive to run experiments on 
    # find anchors that occur more than once in the dataset, then remove duplicates (created by looping) with set()
    duplicate_anchors = [set(anchor) for anchor in set([frozenset(anchor) for anchor in anchors if anchors.count(anchor) > 1])]
    # get the ids and p for each duplicate_anchor in  duplicate_anchors
    # "candidates" is a list of lists with ids (and all other details) of each duplicate_anchor
    candidates = [[(anchor, p, document_id) for anchor, p, document_id in zip(anchors, p, ids) if anchor == duplicate_anchor] for duplicate_anchor in duplicate_anchors ]
    # now check for each paring of the documents in each sublist of "candidates":
    #   is f(a) == f(b)?, if not: discard
    # then pick pair with highest jaccard_score on the original documents in each "candidate"
    pairs = []

    predictions_cache = {}
    def cached_predict(idx):
        if idx not in predictions_cache:
            predictions_cache[idx] = detector.predict_label([documents[idx]])[0]
        return predictions_cache[idx]
    for candidate in tqdm(candidates, desc="Assessing candidates",position=1):
        anchor_s, p, ids  = zip(*candidate)
        c = list(combinations(ids, 2))
        c = [(a,b) for a,b in c if cached_predict(a) == cached_predict(b)]
        if len(c) == 0:
            continue
        jaccard_scores = [(a,b, jaccard_similarity(documents[a], documents[b])) for a,b in tqdm(c, desc="Calculating Jaccard Similarity (of documents not Anchors)",position=0)]
        a,b, score = max(jaccard_scores, key=lambda x: x[2])
        pairs.append((a,b))

    # sample twice: once for f(x) == human and once for f(x) == machine. f(a) == f(b) is tested earlier

    predictions = [cached_predict(a) for a,_ in pairs] # wheter a == b was tested before

    predictions_ = np.array(predictions)
    pairs_ = np.array(pairs)

    machine = pairs_[predictions_ == False]
    human = pairs_[predictions_ == True]
    
    np.random.seed(42)
    result = []
    # select 2 pairs from pairs_: one for each class
    # need to check if a document from the pair is in texts_already_selected
    while True:       
        # one explainer (DetectGPT) has no explanations for f(x) = human:
        if not(True in predictions):
            print("Warning: No examples for f(x) = human. Returning an additional example for machine")
            result =  machine[np.random.choice(machine.shape[0], 2, replace=False)]
        elif not (False in predictions):
            print("Warning: No examples for f(x) = machine. Returning an additional example for human")
            result = human[np.random.choice(human.shape[0], 2, replace=False)]
        else:
            result =  [machine[np.random.randint(0, machine.shape[0]),:], human[np.random.randint(0, human.shape[0]),:]] # returns a random pair for machine and a random pair for human

        
        # check for duplicates in texts_already_selected, re-sample if the pairs are duplicates.
        if all([(documents[a] not in texts_already_selected) and (documents[b] not in texts_already_selected) for a,b in result]):
            return result
        else:
            print("Loop: Avoiding duplicates")

    


# Load Dataset

In [16]:
def obtain_dataset(explainer, detector, documents, gold_labels, document_ids, texts_already_selected):
    if isinstance(explainer, Anchor_Explainer):
        return obtain_dataset_Anchor(explainer, detector, documents, gold_labels, document_ids, texts_already_selected)
    else:
        return obtain_dataset_FI_methods(explainer, detector, documents, gold_labels, document_ids, texts_already_selected)

In [17]:
test = pd.read_pickle("./dataset_test.pkl")
test = test 

documents = list(test["answer"])
gold_labels = list(test["author"] == "human_answers") # convention: 0: machine, 1: human, see detector.py
document_ids = list(range(0,len(documents))) # note that the search algorithms don't use these ids. They are only used for printing and the exclude_list!!

In [18]:
import spacy
nlp = spacy.load("en_core_web_lg")
from sklearn.feature_extraction.text import TfidfVectorizer


tfidf = TfidfVectorizer().fit(documents)

# Perform Document Selection
Some documents are excluded from the user-study for the reasons specified below:

In [19]:

exclude_list = {
    (195, 60,108, 228): "Names forum/service explicitly",
    (288,117, 188, 110): "Author introduces themselves by name",
    (16,): "References earlier post by other user",
    (190,294): "Names forum user who asked the question",
    (27,): "NSFW",
    
    
}
exclude_list = [x for xs in [ list(key) for key in exclude_list.keys()] for x in xs]
exclude_list

[195, 60, 108, 228, 288, 117, 188, 110, 16, 190, 294, 27]

In [20]:
# apply exclude_list
documents = [d for i,d in zip(document_ids, documents) if i not in exclude_list]
gold_labels = [gl for i,gl in zip(document_ids, gold_labels) if i not in exclude_list]
document_ids = [i for i in document_ids if i not in exclude_list]

> **Warning**: If you plan to participate in the user study, set `OBFUSCATE_RESULTS` to `True` before proceeding!!!

In [21]:
columns = ["Detector", "Explainer", "Documents Phases 1+3", "Documents Phases 2+4", "f(a)", "f(b)", "GT a", "GT b", "idx a", "idx b", "Spacy Semantic Similarity: Cosine Similarity Average of Word Vectors (a,b)", "Jaccard Similarity (a,b)", "Cosine Similarity tfidf Vectors","hash a", "hash b"]




In [22]:
def update_selection(pairs, explainer, detector):
    for a,b in pairs:
        if OBFUSCATE_RESULTS:
            continue
        
        tfidf_= tfidf.transform([documents[a], documents[b]])   
        selection.append((detector.__class__.__name__,
                        explainer.__class__.__name__,
                        documents[a], documents[b],
                        *detector.predict_label([documents[a], documents[b]]),
                        gold_labels[a],
                        gold_labels[b],
                        document_ids[a],
                        document_ids[b],
                        nlp(documents[a]).similarity(nlp(documents[b])),
                        jaccard_similarity(documents[a], documents[b]),
                        (tfidf_ * tfidf_.T).toarray()[0,1],
                        explainer.get_hash(documents[a]),
                        explainer.get_hash(documents[b])))

In [23]:
selection = []
for detector_class in [DetectorDetectGPT,DetectorGuo, DetectorRadford]:
    detector = detector_class()
    display(HTML("<h1>{}</h1>".format(detector.__class__.__name__)))
    for explainer_class in [Anchor_Explainer, LIME_Explainer,SHAP_Explainer]:
        explainer = explainer_class(detector)
        display(HTML("<h2>{}</h2>".format(explainer.__class__.__name__)))
        
        texts_already_selected = []
        if len(selection) > 0:
            texts_already_selected = list(zip(*selection))[2] + list(zip(*selection))[3]
        pairs = obtain_dataset(explainer, detector, documents, gold_labels, document_ids, texts_already_selected=texts_already_selected)
        update_selection(pairs, explainer, detector)
        
    

Using cache dir ./.cache
Loading BASE model EleutherAI/pythia-70m...


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


MOVING BASE MODEL TO GPU...DONE (0.19s)
DONE (0.08s)


Loading all explanations: 293it [00:00, 607.06it/s]

Assessing candidates:   0%|          | 0/27 [00:00<?, ?it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 15.74it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 10/10 [00:00<00:00, 16.42it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 6/6 [00:00<00:00, 19.20it/s]

Assessing candidates:  15%|█▍        | 4/27 [00:14<01:31,  3.99s/it]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 25.96it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 17.68it/s]

Assessing candidates:  26%|██▌       | 7/27 [00:19<00:48,  2.44s/it]
Assessing candidates:  30%|██▉       | 8/27 [00:21<00:43,  2.26s/it]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 10/10 [00:00<00:00, 19.98it/s]

Calculating Jaccard Similarity (of docu



Loading all explanations: 100%|██████████| 293/293 [00:00<00:00, 952.63it/s]
Building global dict of features: 100%|██████████| 293/293 [00:01<00:00, 151.29it/s]
Building W: 100%|██████████| 293/293 [00:01<00:00, 156.00it/s]
Obtaining pairs: 100%|██████████| 146/146 [04:37<00:00,  1.90s/it]


Loading all explanations: 100%|██████████| 293/293 [00:00<00:00, 3219.57it/s]
Building global dict of features: 100%|██████████| 293/293 [00:20<00:00, 14.17it/s]
Building W: 100%|██████████| 293/293 [00:21<00:00, 13.51it/s]
Obtaining pairs: 100%|██████████| 146/146 [04:32<00:00,  1.87s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading all explanations: 293it [00:03, 81.49it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 3/3 [00:00<00:00, 16.12it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 15.99it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 18.86it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 3/3 [00:00<00:00, 18.92it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 17.85it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 19.04it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 10/10 [00:00<00:00, 20.56it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 3/3 [00:00<00:00, 19.67it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 23.8

Loop: Avoiding duplicates


Loading all explanations: 100%|██████████| 293/293 [00:02<00:00, 124.96it/s]
Building global dict of features: 100%|██████████| 293/293 [00:01<00:00, 157.95it/s]
Building W: 100%|██████████| 293/293 [00:01<00:00, 155.63it/s]
Obtaining pairs: 100%|██████████| 146/146 [00:07<00:00, 18.50it/s]


Loading all explanations: 100%|██████████| 293/293 [00:02<00:00, 137.23it/s]
Building global dict of features: 100%|██████████| 293/293 [00:19<00:00, 14.65it/s]
Building W: 100%|██████████| 293/293 [00:20<00:00, 14.02it/s]
Obtaining pairs: 100%|██████████| 146/146 [00:09<00:00, 15.74it/s]


Loading all explanations: 293it [00:02, 131.01it/s]

Assessing candidates:   0%|          | 0/26 [00:00<?, ?it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 6/6 [00:00<00:00, 21.62it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 21.28it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 7/7 [00:00<00:00, 21.37it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 27.39it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 2/2 [00:00<00:00, 20.51it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 22.97it/s]
Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 1/1 [00:00<00:00, 21.05it/s]

Calculating Jaccard Similarity (of documents not Anchors): 100%|██████████| 3/3 [00:00<00:00, 19.86it/s]

Calculating Jaccard Similarity (of docum

Loading all explanations: 100%|██████████| 293/293 [00:02<00:00, 127.58it/s]
Building global dict of features: 100%|██████████| 293/293 [00:01<00:00, 156.84it/s]
Building W: 100%|██████████| 293/293 [00:01<00:00, 150.55it/s]
Obtaining pairs: 100%|██████████| 146/146 [00:08<00:00, 17.75it/s]


Loading all explanations: 100%|██████████| 293/293 [00:00<00:00, 875.54it/s]
Building global dict of features: 100%|██████████| 293/293 [00:19<00:00, 14.89it/s]
Building W: 100%|██████████| 293/293 [00:19<00:00, 14.87it/s]
Obtaining pairs: 100%|██████████| 146/146 [00:08<00:00, 16.46it/s]


In [24]:
df = pd.DataFrame(selection, columns=columns)
if not OBFUSCATE_RESULTS:
    display(df)
else:
    display(pd.DataFrame([], columns=columns))

Unnamed: 0,Detector,Explainer,Documents Phases 1+3,Documents Phases 2+4,f(a),f(b),GT a,GT b,idx a,idx b,"Spacy Semantic Similarity: Cosine Similarity Average of Word Vectors (a,b)","Jaccard Similarity (a,b)",Cosine Similarity tfidf Vectors,hash a,hash b
0,DetectorDetectGPT,Anchor_Explainer,Both are saying essentially the same thing. T...,"Assuming you live in the US, it is quite norma...",0,0,True,True,52,238,0.867257,0.176101,0.15379,714b04dd8923e09ea3f370b93660441d792104140d13d3...,60b992dfcad293c2fbe76d7842a4e469ba041ced7c5270...
1,DetectorDetectGPT,Anchor_Explainer,It can be a good idea to follow the advice of ...,It is generally a good idea to use sponsorship...,0,0,False,False,217,234,0.95504,0.164062,0.203377,c4c411239613f735fe43246c9c0108a876ff3ebcf723ee...,e573296bc2021073ef94c1f60c1f097dc770793203b348...
2,DetectorDetectGPT,LIME_Explainer,"Hi,1. The anti TB drug doses have to be prescr...",Credit unions are not-for-profit financial coo...,0,0,True,False,159,161,0.910814,0.117188,0.03904,6691320ac50e6e039d039bc509148cc20d924199ff32ba...,51d1e47a3a3e56608b1fbc9a3f63dfd38a5d07ecd93837...
3,DetectorDetectGPT,LIME_Explainer,Predictive analytics encompasses a variety of ...,You really shouldn't be using class tracking t...,1,1,True,True,6,142,0.801584,0.063694,0.045019,42b84c175e792cbace8c18218652b0e5fc172d0722913d...,604e277aa0ddee14c63bd188af56065e65d36880c5698d...
4,DetectorDetectGPT,SHAP_Explainer,The conversion ratio between General Motors (G...,It is not uncommon for pain to increase after ...,0,0,False,False,166,255,0.94622,0.14966,0.085576,c153dc1fe06bde184f21214073db83e11ce6f526e90551...,60707fbb129a62fd3e3a762cb9a9c8d57a8781c4d65dda...
5,DetectorDetectGPT,SHAP_Explainer,If your counterparty sent money to a correspon...,"As your is a very specific case, please get an...",1,1,True,True,22,82,0.949592,0.142857,0.153888,3dd1514de75255da157c91f80bf9745485cc700f81cf40...,6b25114906789fc86fa719f0394277f9289f080cbbbc7e...
6,DetectorGuo,Anchor_Explainer,If your deductions are higher than your income...,Hallucinations can be caused by a variety of f...,0,0,False,False,77,205,0.936843,0.191781,0.104886,4a28a8f0d8c6fbee4a7aad31697d315f2e478277646094...,f7a467afa47b3708b8ffd00f7dfecb7c4b359dc5a9c59a...
7,DetectorGuo,Anchor_Explainer,"Hello, Thanks for your query.This can occur du...","thanks for your query, the bump could be secon...",1,1,True,True,163,175,0.962259,0.236559,0.255573,797c3e2456febfd1031497754c2da79b9b0cb00f770118...,b2ae8a1624e51a0894124a9aaae1fb99c3557b284a96c7...
8,DetectorGuo,LIME_Explainer,Treatment for a spiral fracture of the ring fi...,"In the United States, you will generally have ...",0,0,False,False,252,274,0.924423,0.156863,0.10656,3838aecf67942145b93b96e42021f369c969c3ec9b719c...,2b0771ebe6b8d46c3ce99d97700281fe561da6c7a153fc...
9,DetectorGuo,LIME_Explainer,Reuters has a service you can subscribe to tha...,On what basis did you do your initial allocati...,1,1,True,True,249,289,0.932199,0.171233,0.181787,e6b72b7969bbad53385251e1baf30d9f72a75067fc1707...,333765b183235b4a0645cd283edc9089a44efa12118002...


In [25]:
for idx, row in df.iterrows():
    print(row["GT a"])
    print(row["idx a"])
    print(row["Documents Phases 1+3"])
    print("---------------")

True
52
Both are saying essentially the same thing.  The Forbes articles says "as much as 20% [...] up to a maximum of $50,000".  This means the same as what the IRS page when it says the lesser of a percentage of your income or a total of $53,000.  In other words, the $53k is a cap: you can contribute a percentage of your earnings, but you can never contribute more than $53k, even if you make so much money that 20% of your earnings would be more than that. (The difference between 20% and 25% in the two sources appears to reflect a difference in contribution limits depending on whether you are making contributions for employees, or for yourself as a self-employed individual; see Publication 560.  The difference between $50k and $53k is due to the two pages being written in different years; the limits increase each year.)
---------------
False
217
It can be a good idea to follow the advice of a guru if they have a track record of making successful investments and have a solid understand

In [26]:
for idx, row in df.iterrows():
    print(row["GT b"])
    print(row["idx b"])
    print(row["Documents Phases 2+4"])
    print("---------------")

True
238
Assuming you live in the US, it is quite normal when you are applying for a loan that the application will ask you to confirm your identity. One of these methods is to ask you which of the following addresses you have lived at, with some of them being very similar (i.e. same city, or maybe even the same street). Sometimes they will ask questions and your answer would be "None of the above." This is done to prevent fraudsters from applying for a loan under your identity. If you see no signs of unauthorized accounts or activities on your credit reports, and you initiated the car loan application, then you should be fine.
---------------
False
234
It is generally a good idea to use sponsorship money to pay off any debts, including student debt. This can help you to reduce the amount of money you owe and avoid accruing additional interest on your loans. However, it is important to carefully consider your financial situation and make sure that you will have enough money to cover yo

# Some Checks

In [29]:
assert not any(df[["Documents Phases 1+3", "Documents Phases 2+4"]].stack().reset_index(drop="True").duplicated()), "Duplicate documents!"

In [30]:
assert all(df.groupby(["Detector", "Explainer"]).count()["Documents Phases 1+3"] == 2)

# Save Selection

In [37]:
if not OBFUSCATE_RESULTS:
    df.to_pickle("./dataset_user_study_mock.pkl") # file in .gitignore

In [None]:
# TODO remove from .gitignore after user study
# TODO change format to something else after user study 

In [None]:
import random

In [None]:
df

# Random Pairs

In [None]:
np.random.seed(42)
detector = DetectorRadford()
explainer = SHAP_Explainer(detector)
selection =[]

candidates = list(range(0,len(documents)))
random.shuffle(candidates)
candidates = candidates[0:2*len(df)]
assert len(candidates) == 2*len(df)

while len(candidates) >= 2:
 
    pair = (candidates[0], candidates[1])
    
    candidates = candidates[2:]
    
    #print_pairs(pairs, documents, gold_labels, document_ids, detector, explainer)
    update_selection([pair], explainer, detector)

In [None]:
df_random = pd.DataFrame(selection, columns=columns)
df_random.groupby("Explainer")[["Spacy Semantic Similarity: Cosine Similarity Average of Word Vectors (a,b)", "Jaccard Similarity (a,b)", "Cosine Similarity tfidf Vectors"]].mean() 


In [None]:
df_random

In [None]:
df.groupby("Explainer")[["Spacy Semantic Similarity: Cosine Similarity Average of Word Vectors (a,b)", "Jaccard Similarity (a,b)", "Cosine Similarity tfidf Vectors"]].mean() 


In [None]:
from scipy.stats.mstats import ttest_rel
from scipy.stats.mstats import ttest_ind
from scipy.stats.mstats import ttest_1samp

In [None]:
df["Jaccard Similarity (a,b)"].mean()

In [None]:
df_random["Jaccard Similarity (a,b)"].mean()

In [None]:
ttest_rel(df["Jaccard Similarity (a,b)"], df_random["Jaccard Similarity (a,b)"])

In [None]:
for metric in ["Spacy Semantic Similarity: Cosine Similarity Average of Word Vectors (a,b)", "Jaccard Similarity (a,b)", "Cosine Similarity tfidf Vectors"]:
    print(ttest_rel(df[metric], df_random[metric]))

In [None]:
a = documents[document_ids[27]]
b = documents[document_ids[281]]


# Explanation BOW Similarity

In [None]:
df

In [None]:
def get_cos_sim_in_W(dff):
    results = []
    for detector_class in [DetectorGuo, DetectorRadford]:
        detector = detector_class()
        for explainer_class in [LIME_Explainer,SHAP_Explainer]:
            explainer = explainer_class(detector)
            for idx, row in dff.iterrows():
                a = row["Documents Phases 1+3"]
                b = row["Documents Phases 2+4"]
                W, _ = get_explanation_matrix_W([a,b], explainer, quiet=True)
                sim = cosine_similarity(W) 
                sim = sim[0,1]
                results.append((explainer.__class__.__name__, detector.__class__.__name__, sim))
    df_fi_similarity = pd.DataFrame(results, columns=["Explainer", "Detector", "Cosine Similarity a,b in W"])
    df_fi_similarity = df_fi_similarity.set_index(["Explainer", "Detector"])
    return df_fi_similarity

In [None]:
df_fi_similarity_random = get_cos_sim_in_W(df_random)
df_fi_similarity = get_cos_sim_in_W(df)

In [None]:
df_fi_similarity.mean()

In [None]:
df_fi_similarity_random.mean()

In [None]:
ttest_ind(df_fi_similarity["Cosine Similarity a,b in W"], df_fi_similarity_random["Cosine Similarity a,b in W"])

In [None]:
def get_eucledian_distance_in_W(dff):
    results = []
    for detector_class in [DetectorGuo, DetectorRadford]:
        detector = detector_class()
        for explainer_class in [LIME_Explainer,SHAP_Explainer]:
            explainer = explainer_class(detector)
            for idx, row in dff.iterrows():
                a = row["Documents Phases 1+3"]
                b = row["Documents Phases 2+4"]
                W, _ = get_explanation_matrix_W([a,b], explainer, quiet=True)
                sim = euclidean_distances(W) 
                sim = sim[0,1]
                results.append((explainer.__class__.__name__, detector.__class__.__name__, sim))
    df_fi_similarity = pd.DataFrame(results, columns=["Explainer", "Detector", "Eucledian Distance a,b in W"])
    df_fi_similarity = df_fi_similarity.set_index(["Explainer", "Detector"])
    return df_fi_similarity

In [None]:
df_fi_similarity_eucledian_random = get_eucledian_distance_in_W(df_random)
df_fi_similarity_eucledian = get_eucledian_distance_in_W(df)

In [None]:
df_fi_similarity_eucledian.mean()

In [None]:
df_fi_similarity_eucledian_random.mean()

In [None]:
ttest_ind(df_fi_similarity_eucledian["Eucledian Distance a,b in W"], df_fi_similarity_eucledian_random["Eucledian Distance a,b in W"])

In [None]:
for metric in ["Spacy Semantic Similarity: Cosine Similarity Average of Word Vectors (a,b)", "Jaccard Similarity (a,b)", "Cosine Similarity tfidf Vectors"]:
    print(metric)
    print("     ", ttest_rel(df[metric], df_random[metric]))