In [1]:
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

from allennlp.predictors.predictor import Predictor
import allennlp_models.tagging

predictor_bert = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz")

predictor_bilstm = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/openie-model.2020.03.26.tar.gz")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### MFT: Short sentences with one verb.
#### GPT-4 prompt:
Generate 50 sentences that contain exactly one verb. And specify what the verb is using a tab. Do not prepend numbers. Each sentence cannot have more than 1 verb.

Here is sample output that I want:

She loves parties.\tloves

Tell me more.\tTell

It's a beautiful day.\t's

Put your hands behind your back!\tPut

In [2]:
with open('data/predicate_identification_1.tsv','r') as infile:
    contents = infile.readlines()
    contents = [row.strip('\n').split('\t') for row in contents]
    sentences = [row[0] for row in contents]
    verbs = [row[1] for row in contents]
    print(sentences[:3])
    print(verbs[:3])    

['She reads every day.', 'He studied all night.', 'He went to the store.']
['reads', 'studied', 'went']


In [3]:
def short_sentences(predictor, sentences, verbs):
    errors = 0
    for sentence, verb in zip(sentences, verbs):
        detected_verbs = set()
        prediction = predictor.predict(sentence=sentence)
        for detected in prediction['verbs']:
            detected_verbs.add(detected['verb'])
        if verb not in detected_verbs:
            errors += 1
            print(repr((sentence, verb)))
    return errors / len(sentences)
            
bert_short_sentences_error_rate = short_sentences(predictor_bert, sentences, verbs)
print(f'(BERT) The error rate for short sentences with one verb is:', bert_short_sentences_error_rate)

bilstm_short_sentences_error_rate = short_sentences(predictor_bilstm, sentences, verbs)
print(f'(BILSTM) The error rate for short sentences with one verb is:', bilstm_short_sentences_error_rate)

(BERT) The error rate for short sentences with one verb is: 0.0
(BILSTM) The error rate for short sentences with one verb is: 0.0


### MFT: Sentences with a succession of verbs.
#### GPT-4 prompt:
Generate sentences with an arbitrary length succession of more than one verbs. Then  write the verbs separated by [tab].

Here is an example:
They sacked, pillaged, and burned the village.[tab]sacked[tab]pillaged[tab]burned

In [4]:
with open('data/predicate_identification_2.tsv','r') as infile:
    contents = infile.readlines()
    contents = [row.strip('\n') for row in  contents]
    contents = [row.split('\t') for row in contents]
    sentences = [row[0] for row in contents]
    verb_lists = [row[1:] for row in contents]
print(sentences[:3])
print(verb_lists[:3])

['I woke, brushed my teeth, and took a shower before leaving for work.', 'She cooked dinner, cleaned the house, and helped the kids with their homework.', 'He ran, jumped, and climbed his way through the obstacle course.']
[['woke', 'brushed', 'took', 'leaving'], ['cooked', 'cleaned', 'helped'], ['ran', 'jumped', 'climbed']]


In [5]:
def verb_succession(predictor, sentences, verb_lists):
    error_rates = []
    for sentence, verb_list in zip(sentences, verb_lists):
        expected = len(verb_list)
        errors = 0
        detected_verbs = set()
        prediction = predictor.predict(sentence=sentence)
        for detected in prediction['verbs']:
            detected_verbs.add(detected['verb'])
        for verb in verb_list:
            if verb not in detected_verbs:
                errors += 1
                print('Verb not detected by model: ', repr((sentence, verb)))
        error_rates.append(errors / expected)
    return sum(error_rates) / len(error_rates)


print("(BERT) The verb succession failure rate is:", verb_succession(predictor_bert, sentences, verb_lists))

Verb not detected by model:  ('She picked, sorted, and delivered the mail to each office.', 'sorted')
Verb not detected by model:  ('The chef chopped, sautéed, and plated the gourmet dish.', 'sautéed')
Verb not detected by model:  ('He folded, packed, and shipped the order to the customer.', 'packed')
Verb not detected by model:  ('The journalist interviewed, transcribed, and reported the breaking news.', 'transcribed')
Verb not detected by model:  ('He scheduled, coordinated, and supervised the construction project.', 'coordinated')
Verb not detected by model:  ('He poured, mixed, and served the refreshing cocktails.', 'mixed')
Verb not detected by model:  ('He checked in, unpacked, and relaxed at the hotel.', 'unpacked')
(BERT) The verb succession failure rate is: 0.033333333333333326


In [6]:
print("(BILSTM) The verb succession failure rate is:", verb_succession(predictor_bilstm, sentences, verb_lists))

Verb not detected by model:  ('She picked, sorted, and delivered the mail to each office.', 'sorted')
Verb not detected by model:  ('The chef chopped, sautéed, and plated the gourmet dish.', 'sautéed')
Verb not detected by model:  ('He folded, packed, and shipped the order to the customer.', 'packed')
Verb not detected by model:  ('The journalist interviewed, transcribed, and reported the breaking news.', 'transcribed')
Verb not detected by model:  ('He scheduled, coordinated, and supervised the construction project.', 'coordinated')
Verb not detected by model:  ('He poured, mixed, and served the refreshing cocktails.', 'mixed')
Verb not detected by model:  ('He checked in, unpacked, and relaxed at the hotel.', 'unpacked')
(BILSTM) The verb succession failure rate is: 0.033333333333333326


### Both models miss the same set of verbs, these verbs seem to be common ones that are used as nouns.

In [7]:
predictor_bert.predict('She picked, sorted, and delivered the mail to each office.')

{'verbs': [{'verb': 'picked',
   'description': '[ARG0: She] [V: picked] , sorted , and delivered the mail to each office .',
   'tags': ['B-ARG0',
    'B-V',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O']},
  {'verb': 'delivered',
   'description': '[ARG0: She] picked , sorted , and [V: delivered] [ARG1: the mail] [ARG2: to each office] .',
   'tags': ['B-ARG0',
    'O',
    'O',
    'O',
    'O',
    'O',
    'B-V',
    'B-ARG1',
    'I-ARG1',
    'B-ARG2',
    'I-ARG2',
    'I-ARG2',
    'O']}],
 'words': ['She',
  'picked',
  ',',
  'sorted',
  ',',
  'and',
  'delivered',
  'the',
  'mail',
  'to',
  'each',
  'office',
  '.']}

In [8]:
predictor_bilstm.predict('She picked, sorted, and delivered the mail to each office.')

{'verbs': [{'verb': 'picked',
   'description': '[ARG0: She] [V: picked] , sorted , and delivered the mail to each office .',
   'tags': ['B-ARG0',
    'B-V',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O']},
  {'verb': 'delivered',
   'description': '[ARG0: She] picked , sorted , and [V: delivered] [ARG1: the mail] [ARG2: to each office] .',
   'tags': ['B-ARG0',
    'O',
    'O',
    'O',
    'O',
    'O',
    'B-V',
    'B-ARG1',
    'I-ARG1',
    'B-ARG2',
    'I-ARG2',
    'I-ARG2',
    'O']}],
 'words': ['She',
  'picked',
  ',',
  'sorted',
  ',',
  'and',
  'delivered',
  'the',
  'mail',
  'to',
  'each',
  'office',
  '.']}