In [1]:
# load spacy's transformer language model
import spacy
nlp = spacy.load("en_core_web_trf")

In [2]:
# add SRL component to spacy's processing pipeline
from srl import *
add_to_pipe(nlp)

lerc is not a registered model.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<spacy.lang.en.English at 0x7fba7b02a520>

In [3]:
# add roleset component to spacy's processing pipeline
from roleset import *
add_roleset_to_pipe(nlp)

lerc is not a registered model.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model: ./psd_model_pos_dep.json


<spacy.lang.en.English at 0x7fba7b02a520>

In [4]:
# example sentence
text="I take a walk in the park. She went home."
doc=nlp(text)



### Custom SRL attribute

In [5]:
# custom SRL attribute - Doc
doc._.srl

{take: [V: take, ARGM-PRR: walk],
 walk: [ARG0: I, ARGM-LVB: take, V: walk, ARGM-LOC: in the park],
 went: [ARG0: She, V: went, ARG4: home]}

In [6]:
# custom SRL attribute - Span ??
doc[:7]._.srl

{}

In [7]:
# custom SRL attribute - Token
doc[3]._.srl

{take: 'B-ARGM-PRR', walk: 'B-V'}

### Custom roleset attribute

In [8]:
# custom roleset attribute - Doc
doc._.roleset

[None,
 'take.01',
 None,
 'walk.01',
 None,
 None,
 None,
 None,
 None,
 'go.02',
 None,
 None]

In [9]:
# custom roleset attribute - Span
doc[:3]._.roleset

[None, 'take.01', None]

In [10]:
# custom roleset attribute - Token
doc[3]._.roleset

'walk.01'

In [11]:
def print_doc(doc, lemma=True, pos=True, srl=True, roleset=True):
    output=""
    
    for sent in doc.sents:

        max_text=max([len(token.text) for token in sent])
        max_lemma=max([len(token.lemma_) for token in sent])
        max_pos=max([len(token.tag_) for token in sent])
        max_srl=[max([len(v) for token in sent for k,v in verb._.srl.items()]) for verb in sent._.srl]
        max_roleset=max([len(token._.roleset) if token._.roleset else 0 for token in sent])
        
        # header
        sent_output="TEXT"+" "* (max_text-len("TEXT")+5)
        if lemma:
            sent_output+="LEMMA"+" "* (max_lemma-len("LEMMA")+5)
        if pos:
            sent_output+="POS"+" "* (max_pos-len("POS")+10)
        if srl:
            for i, verb in enumerate(sent._.srl):
                sent_output+=verb.text.lower()+" "* (max_srl[i]-len(verb.text)+5)
        if roleset: sent_output+="ROLESET"
            
        # sentence text + header
        sent_output="Sentence: "+ sent.text+"\n"+"-"*len(sent_output)+"\n"+sent_output+"\n"+"-"*len(sent_output)+"\n"
        
        #token annotation
        for token in sent:
            sent_output+=token.text+" "* (max_text-len(token.text)+5)
            if lemma:
                sent_output+=token.lemma_+" "* (max_lemma-len(token.lemma_)+5)

            if pos:
                sent_output+= token.tag_ + " "* (max_pos-len(token.tag_)+10)
            if srl:
                for i, item in enumerate(list(token._.srl.items())):
                    sent_output+=token._.srl[item[0]]+ " "* (max_srl[i]-len(token._.srl[item[0]])+5)

            if roleset:
                sent_output+=str(token._.roleset)
            sent_output+="\n"
            
        output+=sent_output+"\n"*2
    
    print(output)
    

In [12]:
print_doc(doc)

Sentence: I take a walk in the park.
--------------------------------------------------------------------
TEXT     LEMMA    POS          take           walk           ROLESET
--------------------------------------------------------------------
I        I        PRP          O              B-ARG0         None
take     take     VBP          B-V            B-ARGM-LVB     take.01
a        a        DT           O              O              None
walk     walk     NN           B-ARGM-PRR     B-V            walk.01
in       in       IN           O              B-ARGM-LOC     None
the      the      DT           O              I-ARGM-LOC     None
park     park     NN           O              I-ARGM-LOC     None
.        .        .            O              O              None


Sentence: She went home.
----------------------------------------------
TEXT     LEMMA    POS          went    ROLESET
----------------------------------------------
She      she      PRP          B-ARG0  None
went     g

In [13]:
# only SRL and roleset
print_doc(doc, lemma=False, pos=False)

Sentence: I take a walk in the park.
----------------------------------------------
TEXT     take           walk           ROLESET
----------------------------------------------
I        O              B-ARG0         None
take     B-V            B-ARGM-LVB     take.01
a        O              O              None
walk     B-ARGM-PRR     B-V            walk.01
in       O              B-ARGM-LOC     None
the      O              I-ARGM-LOC     None
park     O              I-ARGM-LOC     None
.        O              O              None


Sentence: She went home.
------------------------
TEXT     went    ROLESET
------------------------
She      B-ARG0  None
went     B-V     go.02
home     B-ARG4  None
.        O       None



