In [1]:
%%capture
%load_ext autoreload
%autoreload 2

# Windows Environment Simulation
from dotenv import load_dotenv
load_dotenv()

### (ExPred) Load the pretrained model

In [2]:
%%capture
from src.expred import (seeding, ExpredInput,
                        BertTokenizerWithSpans, ExpredConfig, Expred)
from src.expred.models import (prepare_for_cl, train_evidence_classifier,
                               train_mtl_token_identifier)
from transformers import BertTokenizer

# or, simply
expred_config = ExpredConfig(
    pretrained_dataset_name='fever',
    base_dataset_name='fever',
    device='cpu',
    load_from_pretrained=True)

# seeding
seeding(1234)

# Initialize tokenizer
tokenizer = BertTokenizerWithSpans.from_pretrained('bert-base-uncased')

# create the model
expred = Expred.from_pretrained(expred_config)
expred.eval()


## Evaluation of subsequences
The black-box model of ExPred that we test for short-cuts, can be applied to automate fact-checking.
The dataset curated for that purpose is called FeVer: a dataset for Fact Verification.

In [3]:
# Read subset of FeVer corresponding to all occurences of a subsequence
import pandas as pd

evaluations = pd.read_json("making_it_the.json")

print("Loaded", len(evaluations) , "items")

Loaded 8 items


In [4]:
# Create a function to simplify execution of ExPred
def evaluate_query_with_expred(current_query, evidence):
    # transform the input to the way the expred accepts
    expred_input = ExpredInput(
        queries=[current_query.split()],
        docs=[evidence.split()],
        labels=['SUPPORTS', 'REFUTES'],
        config=expred_config,
        ann_ids=['spontan_1'],
        span_tokenizer=tokenizer)
    # don't forget to preprocess
    expred_input.preprocess()

    # the output is in the form of a dict:
    expred_output = expred(expred_input)

    # retrieve the evaluation label
    current_output = expred_input.get_decoded_cls_preds(expred_output)
    return [current_query, current_output[0]]

### Subsequence: 'is exclusively'

First we will confirm that the subsequence holds true for all occurrences of the subsequence.
From the observations it can be seen that all cases of the subset containing the regex are refuted.

In [5]:
# Batch size
n = len(evaluations)

results = []

for i in range (0, n):
    if(evaluations['query'][i]):
        current_query = evaluations['query'][i]
        current_doc = evaluations['evidences'][i]

        results.append(evaluate_query_with_expred(current_query, current_doc))

df = pd.DataFrame(results, columns = ['query', 'label'])
df


Unnamed: 0,query,label
0,Lisbon's administrative limits contain a popul...,SUPPORTS
1,Titanic had an initial worldwide gross of over...,SUPPORTS
2,Luxembourg is a representative democracy with ...,SUPPORTS
3,"Thailand's total area is approximately 513,000...",SUPPORTS
4,Southport is 14.8 miles southwest of Preston m...,REFUTES
5,"The United States has 324 million people, maki...",REFUTES
6,Resident Evil 7: Biohazard has sold over 3 mil...,SUPPORTS
7,"Iran comprises a land area of 1,648,195 square...",SUPPORTS


Percentage of confirmed cases:

In [6]:
(((df.label.values == 'SUPPORTS').sum())/n)*100

75.0

## Adverserial Attack
We replace the substring with another substring that guarantees that the meaning has flipped.

In [7]:
substring = "making it the"
replacewith = "not making it the"
n = len(evaluations)

results = []

for i in range (0, n):
    current_query = evaluations['query'][i].replace(substring, replacewith)
    current_doc = evaluations['evidences'][i]

    results.append(evaluate_query_with_expred(current_query, current_doc))

df = pd.DataFrame(results, columns = ['query', 'label'])
df

Unnamed: 0,query,label
0,Lisbon's administrative limits contain a popul...,REFUTES
1,Titanic had an initial worldwide gross of over...,REFUTES
2,Luxembourg is a representative democracy with ...,REFUTES
3,"Thailand's total area is approximately 513,000...",REFUTES
4,Southport is 14.8 miles southwest of Preston n...,REFUTES
5,"The United States has 324 million people, not ...",REFUTES
6,Resident Evil 7: Biohazard has sold over 3 mil...,REFUTES
7,"Iran comprises a land area of 1,648,195 square...",REFUTES


## Conclusion

This experiment means that 'making it the' is a <span style="color:green">valid short-cut for the ExPred model</span>, as it has very high probability that ExPred supports a query containing this subsequence. The probability is acquired as follows:

In [8]:
(((df.label.values == 'REFUTES').sum())/n)*100

100.0