In [1]:
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

from allennlp.predictors.predictor import Predictor
import allennlp_models.tagging

predictor_bert = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz")

predictor_bilstm = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/openie-model.2020.03.26.tar.gz")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [8]:
predictor_bert.predict(sentence="They went on a hike in the woods.")

{'verbs': [{'verb': 'went',
   'description': '[ARG0: They] [V: went] [ARG1: on a hike in the woods] .',
   'tags': ['B-ARG0',
    'B-V',
    'B-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'O']}],
 'words': ['They', 'went', 'on', 'a', 'hike', 'in', 'the', 'woods', '.']}

### INV: Em dash statements.

In [9]:
import json
with open('data/argument_identification_em_dash_inv.json', 'r') as infile:
    sentences = json.loads(infile.read())
print(sentences[0])

{'original': 'I went to the store yesterday.', 'em_dash': 'I went to the store—where I found a great sale—yesterday.'}


In [46]:
# Define a function to extract the em dash (works with sentence that contain only one of such phrase)
def extract_em_dash(sent):
    # Check if the em dash exists in the sentence
    if '—' in sent:
        # Split the sentence into chunks using the em dash as a separator
        chunks = sent.split('—')
        # Return the second chunk (i.e., the text after the em dash)
        return chunks[1]
    
# Define a function to compute the failure rate of the predictor for sentences with em dashes
def em_dash_inv(predictor, sentences):
    # Initialize a list to store failure rates
    failure_rates = []
    # Iterate through the sentences
    for sentence in sentences:
        # Extract the original and em dash versions of the sentence
        original = sentence['original']
        dashed = sentence['em_dash']
        # Extract the contents of the em dash
        em_dash = extract_em_dash(dashed)
        
        # Make predictions for the original and em dash sentences
        original_pred = predictor.predict(original)
        dashed_pred = predictor.predict(dashed)
        
        # Initialize sets to store argument detections for each sentence version
        original_argument_detections = set()
        dashed_argument_detections = set()
        
        
        # Extract arguments from the original sentence predictions
        for predicate in original_pred['verbs']:
            verb = predicate['verb']
            tags = predicate['tags']
            
            # Iterate through the tokens and tags in the prediction
            for token, tag in zip(original_pred['words'], tags):
                # Add arguments to the set of detections for the original sentence
                if 'ARG' in tag:
                    original_argument_detections.add((verb, token))
                            
        # Extract arguments from the em dash sentence predictions
        for predicate in dashed_pred['verbs']:
            verb = predicate['verb']
            # Check if the verb is not in the em dash text
            if verb not in em_dash:
                tags = predicate['tags']
                include = True
                # Iterate through the tokens and tags in the prediction
                for token, tag in zip(dashed_pred['words'], tags):
                    # Toggle the inclusion flag when encountering an em dash
                    if '—' in token:
                        include = not include

                    # Add arguments to the set of detections for the dashed sentence
                    if 'ARG' in tag and include:
                        dashed_argument_detections.add((verb, token))
                    
        # Compare the number of argument detections in each version and append the result to the failure rates list
        failure_rates.append(len(dashed_argument_detections) != len(original_argument_detections))
                    
    # Calculate the overall failure rate by dividing the sum of failures by the total number of sentences
    return (sum(failure_rates) / len(failure_rates))

print("(BERT) INV - Argument detection invariance to em dash phrases failure rate: ", em_dash_inv(predictor_bert, sentences))
print("(BILSTM) INV - Argument detection invariance to em dash phrases failure rate :", em_dash_inv(predictor_bilstm, sentences))


(BERT) INV - Argument detection invariance to em dash phrases failure rate:  0.9433962264150944
(BILSTM) INV - Argument detection invariance to em dash phrases failure rate : 0.9622641509433962


In [16]:
predictor_bilstm.predict_tokenized(["I", "am", "happy"])

{'verbs': [{'verb': 'am',
   'description': '[ARG1: I] [V: am] [ARG2: happy]',
   'tags': ['B-ARG1', 'B-V', 'B-ARG2']}],
 'words': ['I', 'am', 'happy']}

In [31]:
extract_em_dash("Hello-world")

''

In [35]:
ord('—')

8212