In [1]:
from allennlp_models.pretrained import load_predictor
from allennlp_models.pretrained import get_pretrained_models

import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb
from checklist.test_types import MFT, INV, DIR
from checklist.expect import Expect

from checklist.pred_wrapper import PredictorWrapper
import pandas as pd

from nltk.corpus import wordnet as wn
import random
pd.set_option('max_colwidth', 1000)

In [2]:
# you can define the amount of samples here!
nsamples = 100

In [3]:
def get_tags_v0(sentence):
    output = srl_predictor.predict(sentence)
    if len(output['verbs']) >= 1:
        pp_tags = output['verbs'][0]['tags']#['verb']#[0]['tags']
    else:
        pp_tags = []
    return pp_tags

def get_descr_v0(sentence):
    output = srl_predictor.predict(sentence)
    if len(output['verbs']) >= 1:
        pp_descr = output['verbs'][0]['description']
    else:
        pp_descr = []
    return pp_descr

def find_final_min_one_arg(list_of_arg):
    if list_of_arg:
        element = list_of_arg[-2]
    else:
        element = 'missed_verb'
    return element

def find_first_arg(list_of_arg):
    if list_of_arg:
        element = list_of_arg[0]
    else:
        element = 'missed_verb'
    return element

def inverse_topic(sentence):
    string_split = sentence.split()
    loc = string_split[-3:]
    loc[0]= loc[0].capitalize()
    loc[2] = loc[2][:-1]
    loc[2] = loc[2]+','
    not_loc = string_split[:-3]
    not_loc[0] = not_loc[0].lower()
    rebuilt_sentence_list = loc + not_loc
    rebuilt_sentence = " ".join(rebuilt_sentence_list)+"."
    return(rebuilt_sentence)

def make_data_locative(nsamples):
    list_locative_normal= []
    list_locative_inverted = []
    editor = Editor()

    list_of_places_to_sleep = ['bed', 'hut', 'park', 'tent','cage']

    test_data = editor.template("Only {first_name} sleeps in that {place}.",
                                place = list_of_places_to_sleep,
                                meta=True,  
                                nsamples=nsamples,
                                remove_duplicates=True)
    list_locative_normal.append(test_data['data'])
    #flatten list
    list_locative_normal = [x for xs in list_locative_normal for x in xs]
    
    return list_locative_normal

def make_data_directive(nsamples):
    editor = Editor()
    list_directive = []

    places = ['classroom', 'party', 'park', 'tent','cage','building','future','past','wedding','funeral']
    verb = ['went to']
    subj = ['girls', 'boys', 'men', 'women', 'animals', 'horses']

    test_data = editor.template("The {subj} {verb} the {place}.",
                                subj = subj,
                                place = places,
                                verb = verb,
                                meta=True,  
                                nsamples=nsamples,
                                remove_duplicates=True)
    list_directive.append(test_data['data'])
    #flatten list
    list_directive = [x for xs in list_directive for x in xs]
    return list_directive

## In the cell below, the functions are applied to the data (normal / inverted). All output is stored in separate columns. After that, all of the rows where the tags for normal / inverted differ are shown and the failure rate is calculated by dividing the wrongly predicted tag by the lenght of the original dataframe 

In [4]:
list_data_locative = make_data_locative(nsamples)
list_data_directive = make_data_directive(nsamples)

In [None]:
srl_predictor = load_predictor('structured-prediction-srl-bert')
df_locative = pd.DataFrame(list_data_locative)

df_locative['tags_normal'] = df_locative[0].apply(get_tags_v0)
df_locative['descr_normal'] = df_locative[0].apply(get_descr_v0)
df_locative['final-1_tag_normal'] = df_locative['tags_normal'].apply(find_final_min_one_arg)

df_locative['inverted'] = df_locative[0].apply(inverse_topic)
df_locative['tags_inverted'] = df_locative['inverted'].apply(get_tags_v0)
df_locative['descr_inverted'] = df_locative['inverted'].apply(get_descr_v0)
df_locative['first_tag_inverted'] = df_locative['tags_inverted'].apply(find_first_arg)

df_locative['final-1_tag_normal'] = df_locative['final-1_tag_normal'].str[2:]
df_locative['first_tag_inverted'] =df_locative['first_tag_inverted'].str[2:]
df_locative.head(5)

lerc is not a registered model.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
errors_locative_bert = df_locative[df_locative['final-1_tag_normal'] != df_locative['first_tag_inverted']][[0,'descr_normal', 'descr_inverted']]
failure_rate_locative_bert = len(errors_locative_bert) / len(df_locative)
errors_locative_bert

In [None]:
df_directive = pd.DataFrame(list_data_directive)

df_directive['tags_normal'] = df_directive[0].apply(get_tags_v0)
df_directive['descr_normal'] = df_directive[0].apply(get_descr_v0)
df_directive['final-1_tag_normal'] = df_directive['tags_normal'].apply(find_final_min_one_arg)

df_directive['inverted'] = df_directive[0].apply(inverse_topic)
df_directive['tags_inverted'] = df_directive['inverted'].apply(get_tags_v0)
df_directive['descr_inverted'] = df_directive['inverted'].apply(get_descr_v0)
df_directive['first_tag_inverted'] = df_directive['tags_inverted'].apply(find_first_arg)

df_directive['final-1_tag_normal'] = df_directive['final-1_tag_normal'].str[2:]
df_directive['first_tag_inverted'] =df_directive['first_tag_inverted'].str[2:]
df_directive.head(5)

In [None]:
errors_directive_bert = df_directive[df_directive['final-1_tag_normal'] != df_directive['first_tag_inverted']][[0,'descr_normal', 'descr_inverted']]
failure_rate_directive_bert = len(errors_directive_bert) / len(df_directive)
errors_directive_bert


In [None]:
srl_predictor = load_predictor('structured-prediction-srl')
df_locative = pd.DataFrame(list_data_locative)

df_locative['tags_normal'] = df_locative[0].apply(get_tags_v0)
df_locative['descr_normal'] = df_locative[0].apply(get_descr_v0)
df_locative['final-1_tag_normal'] = df_locative['tags_normal'].apply(find_final_min_one_arg)

df_locative['inverted'] = df_locative[0].apply(inverse_topic)
df_locative['tags_inverted'] = df_locative['inverted'].apply(get_tags_v0)
df_locative['descr_inverted'] = df_locative['inverted'].apply(get_descr_v0)
df_locative['first_tag_inverted'] = df_locative['tags_inverted'].apply(find_first_arg)

df_locative['final-1_tag_normal'] = df_locative['final-1_tag_normal'].str[2:]
df_locative['first_tag_inverted'] =df_locative['first_tag_inverted'].str[2:]
df_locative.head(5)

In [None]:
errors_locative_bilstm = df_locative[df_locative['final-1_tag_normal'] != df_locative['first_tag_inverted']][[0,'descr_normal', 'descr_inverted']]
failure_rate_locative_bilstm = len(errors_locative_bilstm) / len(df_locative)
errors_locative_bilstm

In [None]:
df_locative.iloc[27]

In [None]:
df_directive = pd.DataFrame(list_data_directive)

df_directive['tags_normal'] = df_directive[0].apply(get_tags_v0)
df_directive['descr_normal'] = df_directive[0].apply(get_descr_v0)
df_directive['final-1_tag_normal'] = df_directive['tags_normal'].apply(find_final_min_one_arg)

df_directive['inverted'] = df_directive[0].apply(inverse_topic)
df_directive['tags_inverted'] = df_directive['inverted'].apply(get_tags_v0)
df_directive['descr_inverted'] = df_directive['inverted'].apply(get_descr_v0)
df_directive['first_tag_inverted'] = df_directive['tags_inverted'].apply(find_first_arg)

df_directive['final-1_tag_normal'] = df_directive['final-1_tag_normal'].str[2:]
df_directive['first_tag_inverted'] =df_directive['first_tag_inverted'].str[2:]
df_directive.head(5)

In [None]:
errors_directive_bilstm = df_directive[df_directive['final-1_tag_normal'] != df_directive['first_tag_inverted']][[0,'descr_normal', 'descr_inverted']]
failure_rate_directive_bilstm = len(errors_directive_bilstm) / len(df_directive)
errors_directive_bilstm

In [None]:
print("error rate locative BiLSTM:")
print(failure_rate_locative_bilstm)
print("error rate directive BiLSTM:")
print(failure_rate_directive_bilstm)
print("error rate locative BERT:")
print(failure_rate_locative_bert)
print("error rate directive BERT:")
print(failure_rate_directive_bert)