In [1]:
import ssl
import os

ssl._create_default_https_context = ssl._create_unverified_context

from allennlp.predictors.predictor import Predictor
import allennlp_models.tagging

predictor_bert = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz")

predictor_bilstm = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/openie-model.2020.03.26.tar.gz")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [2]:
import json
with open('data/voice_inv.json', 'r') as infile:
    contents = infile.read()
    sentences = json.loads(contents)
print(sentences[-3])
print(len(sentences), "sentences")

{'active': 'The pilot flew the plane across the ocean.', 'passive': 'The plane was flown across the ocean by the pilot.'}
89 sentences


### INV: Active/Passive voice.

In [3]:
def voice_inv(predictor, sentences, model_name):
    failures = list()
    model_outputs = []
    verb_counts = set()
    
    for sentence in sentences:
        active = sentence['active']
        passive = sentence['passive']
        
        active_pred = predictor.predict(active)
        passive_pred = predictor.predict(passive)
        
        # The first failure case, we need to substract 1 for 'was'
        if len(active_pred['verbs']) != (len(passive_pred['verbs']) -1):
            failures.append(1)
            failure = True
        else:
            active_set = set(active_pred['verbs'][0]['tags'])
   
            if passive_pred['verbs'][0]['verb'].lower() not in ['was', 'were']:
                passive_set = set(passive_pred['verbs'][0]['tags'])
            else:
                passive_set = set(passive_pred['verbs'][1]['tags'])

            failure = active_set != passive_set
            failures.append(1 * failure)

        # Add output to model_outputs
        model_outputs.append({
            'input': {'active': active, 'passive': passive},
            'output': {'active_pred': active_pred, 'passive_pred': passive_pred},
            'failure': failure
        })

    # Write model_outputs to a JSON file
    if not os.path.exists("model_outputs"):
        os.makedirs("model_outputs")
    with open(f'model_outputs/voice_inv_{model_name}.json', 'w') as f:
        json.dump(model_outputs, f, indent=4)
                                  
    return (sum(failures) / len(failures))
   
print("(BERT) INV to active/passive voice failure rate:", 100 * voice_inv(predictor_bert, sentences, 'BERT'))
print("(BiLSTM) INV to active/passive voice failure rate:", 100 * voice_inv(predictor_bilstm, sentences, 'BiLSTM'))

(BERT) INV to active/passive voice failure rate: 12.359550561797752
(BiLSTM) INV to active/passive voice failure rate: 20.224719101123593
