In [21]:
import spacy
from spacy.matcher import Matcher 
from spacy.tokens import Span 
from spacy import displacy
import neuralcoref

In [64]:
class Assessment(object):
    
    def __init__(self, object):
        
        Span.set_extension("assessment", default = None, force = True)
        self.matcher = Matcher(object.vocab)
        self.matcher.add("Assessment", None,
            
            #(our|my) (opinion|understanding) (is|was) that
            [{'DEP':'poss'},
            {"LOWER": {'IN':['understanding', 'opinion']}}, 
            {'LEMMA': 'be'},
            {'LOWER': 'that'}],

            # it (is|was) (our|my) (opinion|understanding) (that)?                     
            [{'LOWER':'it'},
            {'LEMMA': 'be'},
            {'DEP': 'poss'},
            {'LOWER' :{'IN':['opinion', 'understanding']}},
            {'LOWER': 'that', 'OP':'?'}],

            #in (our|my) opinion
            [{'LOWER': 'in'},
             {'DEP': 'poss'},
             {'LEMMA': 'opinion'}],

            #(our|my) take on
            [{'DEP': 'poss'},
             {'LOWER': 'take'},
             {'LOWER': 'on'}],

            #it (seems|seemed) to (us|me) (that)?
            [{'LOWER': 'it'},
             {'LEMMA': 'seem'},
             {'LOWER': 'to'},
             {'POS': 'PRON'},
             {'LOWER': 'that', 'OP':'?'}],

            #it (seems|seemed) (that)?
            [{'LOWER': 'it'},
             {'LEMMA': 'seem'},
             {'LOWER': 'that', 'OP':'?'}],

            #it would seem to (us|me)?
            [{'LOWER': 'it'},
             {'LOWER': 'would'},
             {'LOWER': 'seem'},
             {'LOWER': 'to'},
             {'POS': 'PRON', 'OP':'?'}],

            #it would appear to (us|me)?
            [{'LOWER': 'it'},
             {'LOWER': 'would'},
             {'LOWER': 'appear'},
             {'LOWER': 'to'},
             {'POS': 'PRON', 'OP':'?'}],

            #it appear to (us|me)?
            [{'LOWER': 'it'},
             {'LEMMA': 'appear'},
             {'LOWER': 'to'},
             {'POS': 'PRON', 'OP':'?'}],

            #(the|my|our) ([\w]+[ ])?point is (that)?
            [{'DEP': {'IN':['poss', 'det']}},
             {'IS_ALPHA': True, 'OP':'?'},
             {'LOWER': 'point'},
             {'LEMMA': 'be'},
             {'LOWER': 'that', 'OP':'?'}]

        )
    
    def __call__(self, doc):
        matches = self.matcher(doc)
        for match_id, start, end in matches:
            sents = Span(doc, start, end).sent
            sent_start, sent_end = sents.start, sents.end
            opinion = Span(doc, sent_start, sent_end, label = "ASS")
            doc._.opinion.append(opinion,)
        return doc

In [3]:
class spacy_arglex(object):
    
    def __init__(self, object):
        self.nlp = object.load("en_core_web_sm")
        
        # Set Document Extensions
        object.tokens.Doc.set_extension("opinion", default = [], force = True)
        
        # Set Span extensions
        object.tokens.Span.set_extension("assessment", default = None, force = True) 
        
        # Set Token Extensions
        object.tokens.Token.set_extension("is_emo", getter = lambda token: token.lemma_ \
                            in ('like', 'adore', 'want', 'prefer', 'love', 'enjoy',
                               'adoration', 'want', 'preference', 'love', 'enjoyment',
                               'hate', 'dislike', 'disprefer', 'dispreference'), force = True)
        
        opinion_tag = Assessment(self.nlp)
        self.nlp.add_pipe(opinion_tag, last = True)

In [15]:
nlp = spacy.load('en_core_web_sm')

In [16]:
doc = nlp("It was my understanding that burgers are life. However, I do not like seafood. It would seem to me that this is tasty")

In [4]:
test = spacy_arglex(spacy)

In [5]:
a = test.nlp("It was my understanding that burgers are life. However, I do not like seafood. It would seem to me that this is tasty")

In [6]:
a._.opinion[0].label_

'ASS'

In [18]:
from spacy.tokens import Doc
Doc.set_extension("opinion", default = False, force = True)

In [78]:
def add_event_ent(matcher, doc, i, matches):
    match_id, start, end = matches[i]
    opinion = Span(doc, start, end, label="Assessment")
    doc.opinion += (opinion,)

In [280]:
a._.opinion[0].label_

'ASS'

In [208]:
matcher = Matcher(nlp.vocab)
matcher.add("assessment", None ,
            [{'DEP':'poss'},
            {"LOWER": {'IN':['understanding', 'opinion']}}, 
            {'LEMMA': 'be'},
            {'LOWER': 'that'}],

            # it (is|was) (our|my) (opinion|understanding) (that)?                     
            [{'LOWER':'it'},
            {'LEMMA': 'be'},
            {'DEP': 'poss'},
            {'LOWER' :{'IN':['opinion', 'understanding']}},
            {'LOWER': 'that', 'OP':'?'}],

            #in (our|my) opinion
            [{'LOWER': 'in'},
             {'DEP': 'poss'},
             {'LEMMA': 'opinion'}],

            #(our|my) take on
            [{'DEP': 'poss'},
             {'LOWER': 'take'},
             {'LOWER': 'on'}],

            #it (seems|seemed) to (us|me) (that)?
            [{'LOWER': 'it'},
             {'LEMMA': 'seem'},
             {'LOWER': 'to'},
             {'POS': 'PRON'},
             {'LOWER': 'that', 'OP':'?'}],

            #it (seems|seemed) (that)?
            [{'LOWER': 'it'},
             {'LEMMA': 'seem'},
             {'LOWER': 'that', 'OP':'?'}],

            #it would seem to (us|me)?
            [{'LOWER': 'it'},
             {'LOWER': 'would'},
             {'LOWER': 'seem'},
             {'LOWER': 'to'},
             {'POS': 'PRON', 'OP':'?'}],

            #it would appear to (us|me)?
            [{'LOWER': 'it'},
             {'LOWER': 'would'},
             {'LOWER': 'appear'},
             {'LOWER': 'to'},
             {'POS': 'PRON', 'OP':'?'}],

            #it appear to (us|me)?
            [{'LOWER': 'it'},
             {'LEMMA': 'appear'},
             {'LOWER': 'to'},
             {'POS': 'PRON', 'OP':'?'}],

            #(the|my|our) ([\w]+[ ])?point is (that)?
            [{'DEP': {'IN':['poss', 'det']}},
             {'IS_ALPHA': True, 'OP':'?'},
             {'LOWER': 'point'},
             {'LEMMA': 'be'},
             {'LOWER': 'that', 'OP':'?'}])

In [209]:
matches = matcher(doc)

In [270]:
a[4].sent.end

9

In [226]:
list(set([i[0:2] for i in matches]))

[(16732015447806033820, 0), (16732015447806033820, 17)]

In [227]:
[i[0:2] for i in matches]

[(16732015447806033820, 0),
 (16732015447806033820, 0),
 (16732015447806033820, 17),
 (16732015447806033820, 17)]

In [None]:
max(matches, key=lambda x: x[2])

In [65]:
def arglex(object):
    nlp = object.load('en_core_web_sm')
    
    # Set Document Extensions
    object.tokens.Doc.set_extension("opinion", default = [], force = True)
    object.tokens.Span.set_extension("assessment", default = None, force = True) 
    
    ass = Assessment(nlp)
    nlp.add_pipe(ass, name = 'Assessment', last = True)
    return nlp

In [66]:
nlp = arglex(spacy)

In [None]:
nlp = spacy.load('en_core_web_sm')

In [67]:
doc = nlp('my understanding was that this is cool.')

In [70]:
doc._.opinion[0].label_

'ASS'